From cfdadcbd2b529cd9ac721509a7ebafe436afcd8d Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 7 May 2015 00:21:10 -0700
Subject: [PATCH 001/320] [SPARK-7430] [STREAMING] [TEST] General improvements
 to streaming tests to increase debuggability

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #5961 from tdas/SPARK-7430 and squashes the following commits:

d654978 [Tathagata Das] Fix scala style
fbf7174 [Tathagata Das] Added more verbose assert failure messages.
6aea07a [Tathagata Das] Ensure SynchronizedBuffer is used in every TestSuiteBase
---
 .../spark/streaming/TestSuiteBase.scala       | 33 ++++++++++++-------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 4d0cd7516f42e..4f70ae7f1f187 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -73,9 +73,11 @@ class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]],
  *
  * The buffer contains a sequence of RDD's, each containing a sequence of items
  */
-class TestOutputStream[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
-  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
+class TestOutputStream[T: ClassTag](
+    parent: DStream[T],
+    val output: SynchronizedBuffer[Seq[T]] =
+      new ArrayBuffer[Seq[T]] with SynchronizedBuffer[Seq[T]]
+  ) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
     val collected = rdd.collect()
     output += collected
   }) {
@@ -95,8 +97,10 @@ class TestOutputStream[T: ClassTag](parent: DStream[T],
  * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
  * containing a sequence of items.
  */
-class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
-    val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
+class TestOutputStreamWithPartitions[T: ClassTag](
+    parent: DStream[T],
+    val output: SynchronizedBuffer[Seq[Seq[T]]] =
+      new ArrayBuffer[Seq[Seq[T]]] with SynchronizedBuffer[Seq[Seq[T]]])
   extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
     val collected = rdd.glom().collect().map(_.toSeq)
     output += collected
@@ -108,10 +112,6 @@ class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
     ois.defaultReadObject()
     output.clear()
   }
-
-  def toTestOutputStream: TestOutputStream[T] = {
-    new TestOutputStream[T](this.parent, this.output.map(_.flatten))
-  }
 }
 
 /**
@@ -425,12 +425,21 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     logInfo("--------------------------------")
 
     // Match the output with the expected output
-    assert(output.size === expectedOutput.size, "Number of outputs do not match")
     for (i <- 0 until output.size) {
       if (useSet) {
-        assert(output(i).toSet === expectedOutput(i).toSet)
+        assert(
+          output(i).toSet === expectedOutput(i).toSet,
+          s"Set comparison failed\n" +
+            s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" +
+            s"Generated output (${output.size} items): ${output.mkString("\n")}"
+        )
       } else {
-        assert(output(i).toList === expectedOutput(i).toList)
+        assert(
+          output(i).toList === expectedOutput(i).toList,
+          s"Ordered list comparison failed\n" +
+            s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" +
+            s"Generated output (${output.size} items): ${output.mkString("\n")}"
+        )
       }
     }
     logInfo("Output verified successfully")

From 01187f59b3d118495b6cfea965690829b99a36fa Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 7 May 2015 00:24:44 -0700
Subject: [PATCH 002/320] [SPARK-7217] [STREAMING] Add configuration to control
 the default behavior of StreamingContext.stop() implicitly calling
 SparkContext.stop()

In environments like notebooks, the SparkContext is managed by the underlying infrastructure and it is expected that the SparkContext will not be stopped. However, StreamingContext.stop() calls SparkContext.stop() as a non-intuitive side-effect. This PR adds a configuration in SparkConf that sets the default StreamingContext stop behavior. It should be such that the existing behavior does not change for existing users.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #5929 from tdas/SPARK-7217 and squashes the following commits:

869a763 [Tathagata Das] Changed implementation.
685fe00 [Tathagata Das] Added configuration
---
 .../spark/streaming/StreamingContext.scala    | 10 +++++++---
 .../streaming/StreamingContextSuite.scala     | 19 +++++++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index b1ad0d42ec8a6..bbdb4e8af036c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -563,13 +563,17 @@ class StreamingContext private[streaming] (
 
   /**
    * Stop the execution of the streams immediately (does not wait for all received data
-   * to be processed).
+   * to be processed). By default, if `stopSparkContext` is not specified, the underlying
+   * SparkContext will also be stopped. This implicit behavior can be configured using the
+   * SparkConf configuration spark.streaming.stopSparkContextByDefault.
    *
-   * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext
+   * @param stopSparkContext If true, stops the associated SparkContext. The underlying SparkContext
    *                         will be stopped regardless of whether this StreamingContext has been
    *                         started.
    */
-  def stop(stopSparkContext: Boolean = true): Unit = synchronized {
+  def stop(
+      stopSparkContext: Boolean = conf.getBoolean("spark.streaming.stopSparkContextByDefault", true)
+     ): Unit = synchronized {
     stop(stopSparkContext, false)
   }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 5207b7109e69b..a589deb1fa579 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -118,6 +118,11 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(ssc.state === ssc.StreamingContextState.Started)
     ssc.stop()
     assert(ssc.state === ssc.StreamingContextState.Stopped)
+
+    // Make sure that the SparkContext is also stopped by default
+    intercept[Exception] {
+      ssc.sparkContext.makeRDD(1 to 10)
+    }
   }
 
   test("start multiple times") {
@@ -154,16 +159,26 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   }
 
   test("stop only streaming context") {
-    ssc = new StreamingContext(master, appName, batchDuration)
+    val conf = new SparkConf().setMaster(master).setAppName(appName)
+
+    // Explicitly do not stop SparkContext
+    ssc = new StreamingContext(conf, batchDuration)
     sc = ssc.sparkContext
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop(stopSparkContext = false)
     assert(sc.makeRDD(1 to 100).collect().size === 100)
-    ssc = new StreamingContext(sc, batchDuration)
+    sc.stop()
+
+    // Implicitly do not stop SparkContext
+    conf.set("spark.streaming.stopSparkContextByDefault", "false")
+    ssc = new StreamingContext(conf, batchDuration)
+    sc = ssc.sparkContext
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop()
+    assert(sc.makeRDD(1 to 100).collect().size === 100)
+    sc.stop()
   }
 
   test("stop(stopSparkContext=true) after stop(stopSparkContext=false)") {

From fa8fddffd52f8146ccceb72c2990607aaf5b2131 Mon Sep 17 00:00:00 2001
From: Shiti <ssaxena.ece@gmail.com>
Date: Thu, 7 May 2015 01:00:29 -0700
Subject: [PATCH 003/320] [SPARK-7295][SQL]  bitwise operations for DataFrame
 DSL

Author: Shiti <ssaxena.ece@gmail.com>

Closes #5867 from Shiti/spark-7295 and squashes the following commits:

71a9913 [Shiti] implementation for bitwise and,or, not and xor on Column with tests and docs
---
 python/pyspark/sql/dataframe.py               |  5 +++
 python/pyspark/sql/functions.py               |  2 ++
 python/pyspark/sql/tests.py                   | 13 ++++++++
 .../scala/org/apache/spark/sql/Column.scala   | 31 +++++++++++++++++
 .../org/apache/spark/sql/functions.scala      |  8 +++++
 .../spark/sql/ColumnExpressionSuite.scala     | 33 +++++++++++++++++--
 .../spark/sql/DataFrameFunctionsSuite.scala   |  7 ++++
 7 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 24f370543def4..cee804f5cc1f7 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1277,6 +1277,11 @@ def __init__(self, jc):
     __contains__ = _bin_op("contains")
     __getitem__ = _bin_op("getItem")
 
+    # bitwise operators
+    bitwiseOR = _bin_op("bitwiseOR")
+    bitwiseAND = _bin_op("bitwiseAND")
+    bitwiseXOR = _bin_op("bitwiseXOR")
+
     def getItem(self, key):
         """An expression that gets an item at position `ordinal` out of a list,
          or gets an item by key out of a dict.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 692af868dd534..274c410a1ee9c 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -104,6 +104,8 @@ def _(col1, col2):
     'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
              'measured in radians.',
 
+    'bitwiseNOT': 'Computes bitwise not.',
+
     'max': 'Aggregate function: returns the maximum value of the expression in a group.',
     'min': 'Aggregate function: returns the minimum value of the expression in a group.',
     'first': 'Aggregate function: returns the first value in a group.',
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b232f3a965526..45dfedce22add 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -645,6 +645,19 @@ def test_fillna(self):
         self.assertEqual(row.age, None)
         self.assertEqual(row.height, None)
 
+    def test_bitwise_operations(self):
+        from pyspark.sql import functions
+        row = Row(a=170, b=75)
+        df = self.sqlCtx.createDataFrame([row])
+        result = df.select(df.a.bitwiseAND(df.b)).collect()[0].asDict()
+        self.assertEqual(170 & 75, result['(a & b)'])
+        result = df.select(df.a.bitwiseOR(df.b)).collect()[0].asDict()
+        self.assertEqual(170 | 75, result['(a | b)'])
+        result = df.select(df.a.bitwiseXOR(df.b)).collect()[0].asDict()
+        self.assertEqual(170 ^ 75, result['(a ^ b)'])
+        result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
+        self.assertEqual(~75, result['~b'])
+
 
 class HiveContextSQLTests(ReusedPySparkTestCase):
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 8eb632d3d600b..8bbe11b412214 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -698,6 +698,37 @@ class Column(protected[sql] val expr: Expression) extends Logging {
       println(expr.prettyString)
     }
   }
+
+  /**
+   * Compute bitwise OR of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseOR($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr)
+
+  /**
+   * Compute bitwise AND of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseAND($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr)
+
+  /**
+   * Compute bitwise XOR of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseXOR($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr)
+
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 830b5017717b5..1728b0b8c910e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -438,6 +438,14 @@ object functions {
    */
   def upper(e: Column): Column = Upper(e.expr)
 
+
+  /**
+   * Computes bitwise NOT.
+   *
+   * @group normal_funcs
+   */
+  def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr)
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Math Functions
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3c1ad656fc855..d96186c268720 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -27,8 +27,6 @@ import org.apache.spark.sql.types._
 class ColumnExpressionSuite extends QueryTest {
   import org.apache.spark.sql.TestData._
 
-  // TODO: Add test cases for bitwise operations.
-
   test("collect on column produced by a binary operator") {
     val df = Seq((1, 2, 3)).toDF("a", "b", "c")
     checkAnswer(df.select(df("a") + df("b")), Seq(Row(3)))
@@ -385,4 +383,35 @@ class ColumnExpressionSuite extends QueryTest {
       assert(row.getDouble(1) >= -4.0)
     }
   }
+
+  test("bitwiseAND") {
+    checkAnswer(
+      testData2.select($"a".bitwiseAND(75)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) & 75)))
+
+    checkAnswer(
+      testData2.select($"a".bitwiseAND($"b").bitwiseAND(22)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) & r.getInt(1) & 22)))
+  }
+
+  test("bitwiseOR") {
+    checkAnswer(
+      testData2.select($"a".bitwiseOR(170)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) | 170)))
+
+    checkAnswer(
+      testData2.select($"a".bitwiseOR($"b").bitwiseOR(42)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) | r.getInt(1) | 42)))
+  }
+
+  test("bitwiseXOR") {
+    checkAnswer(
+      testData2.select($"a".bitwiseXOR(112)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ 112)))
+
+    checkAnswer(
+      testData2.select($"a".bitwiseXOR($"b").bitwiseXOR(39)),
+      testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ r.getInt(1) ^ 39)))
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index ca03713ef4658..b1e0faa310b68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.TestSQLContext.implicits._
 import org.apache.spark.sql.types._
@@ -81,4 +82,10 @@ class DataFrameFunctionsSuite extends QueryTest {
       struct(col("a") * 2)
     }
   }
+
+  test("bitwiseNOT") {
+    checkAnswer(
+      testData2.select(bitwiseNOT($"a")),
+      testData2.collect().toSeq.map(r => Row(~r.getInt(0))))
+  }
 }

From fae4e2d6094de57a438ee4188ce47fc5b01b96fe Mon Sep 17 00:00:00 2001
From: ksonj <kson@siberie.de>
Date: Thu, 7 May 2015 01:02:00 -0700
Subject: [PATCH 004/320] [SPARK-7035] Encourage __getitem__ over __getattr__
 on column access in the Python DataFrame API

Author: ksonj <kson@siberie.de>

Closes #5971 from ksonj/doc and squashes the following commits:

dadfebb [ksonj] __getitem__ is cleaner than __getattr__
---
 docs/sql-programming-guide.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index b8233ae06fdf3..df4c123bdd86c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -139,7 +139,6 @@ DataFrames provide a domain-specific language for structured data manipulation i
 
 Here we include some basic examples of structured data processing using DataFrames:
 
-
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
 {% highlight scala %}
@@ -242,6 +241,12 @@ df.groupBy("age").count().show();
 </div>
 
 <div data-lang="python"  markdown="1">
+In Python it's possible to access a DataFrame's columns either by attribute
+(`df.age`) or by indexing (`df['age']`). While the former is convenient for
+interactive data exploration, users are highly encouraged to use the
+latter form, which is future proof and won't break with column names that
+are also attributes on the DataFrame class.
+
 {% highlight python %}
 from pyspark.sql import SQLContext
 sqlContext = SQLContext(sc)
@@ -270,14 +275,14 @@ df.select("name").show()
 ## Justin
 
 # Select everybody, but increment the age by 1
-df.select(df.name, df.age + 1).show()
+df.select(df['name'], df['age'] + 1).show()
 ## name    (age + 1)
 ## Michael null
 ## Andy    31
 ## Justin  20
 
 # Select people older than 21
-df.filter(df.age > 21).show()
+df.filter(df['age'] > 21).show()
 ## age name
 ## 30  Andy
 

From 8b6b46e4ff5f19fb7befecaaa0eda63bf29a0e2c Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Thu, 7 May 2015 01:12:14 -0700
Subject: [PATCH 005/320] [SPARK-7421] [MLLIB] OnlineLDA cleanups

Small changes, primarily to allow us more flexibility in the future:
* Rename "tau_0" to "tau0"
* Mark LDAOptimizer trait sealed and DeveloperApi.
* Mark LDAOptimizer subclasses as final.
* Mark setOptimizer (the one taking an LDAOptimizer) and getOptimizer as DeveloperApi since we may need to change them in the future

CC: hhbyyh

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5956 from jkbradley/onlinelda-cleanups and squashes the following commits:

f4be508 [Joseph K. Bradley] added newline
f4003e4 [Joseph K. Bradley] Changes: * Rename "tau_0" to "tau0" * Mark LDAOptimizer trait sealed and DeveloperApi. * Mark LDAOptimizer subclasses as final. * Mark setOptimizer (the one taking an LDAOptimizer) and getOptimizer as DeveloperApi since we may need to change them in the future
---
 .../apache/spark/mllib/clustering/LDA.scala   | 15 ++++++--
 .../spark/mllib/clustering/LDAOptimizer.scala | 37 +++++++++----------
 .../spark/mllib/clustering/JavaLDASuite.java  |  2 +-
 .../spark/mllib/clustering/LDASuite.scala     |  8 ++--
 4 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index c8daa2388e868..a410547a72fda 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.mllib.clustering
 
 import breeze.linalg.{DenseVector => BDV}
+
 import org.apache.spark.Logging
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaPairRDD
 import org.apache.spark.graphx._
 import org.apache.spark.mllib.linalg.Vector
@@ -197,12 +198,20 @@ class LDA private (
   }
 
 
-  /** LDAOptimizer used to perform the actual calculation */
+  /**
+   * :: DeveloperApi ::
+   *
+   * LDAOptimizer used to perform the actual calculation
+   */
+  @DeveloperApi
   def getOptimizer: LDAOptimizer = ldaOptimizer
 
   /**
+   * :: DeveloperApi ::
+   *
    * LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
    */
+  @DeveloperApi
   def setOptimizer(optimizer: LDAOptimizer): this.type = {
     this.ldaOptimizer = optimizer
     this
@@ -210,7 +219,7 @@ class LDA private (
 
   /**
    * Set the LDAOptimizer used to perform the actual calculation by algorithm name.
-   * Currently "em", "online" is supported.
+   * Currently "em", "online" are supported.
    */
   def setOptimizer(optimizerName: String): this.type = {
     this.ldaOptimizer =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 093aa0f315ab2..6fa2fe053c6a4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, sum, normalize, kr
 import breeze.numerics.{digamma, exp, abs}
 import breeze.stats.distributions.{Gamma, RandBasis}
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
 import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
@@ -31,13 +31,13 @@ import org.apache.spark.mllib.linalg.{Matrices, SparseVector, DenseVector, Vecto
 import org.apache.spark.rdd.RDD
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  *
  * An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can
  * hold optimizer-specific parameters for users to set.
  */
-@Experimental
-trait LDAOptimizer {
+@DeveloperApi
+sealed trait LDAOptimizer {
 
   /*
     DEVELOPERS NOTE:
@@ -59,7 +59,7 @@ trait LDAOptimizer {
 }
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  *
  * Optimizer for EM algorithm which stores data + parameter graph, plus algorithm parameters.
  *
@@ -75,8 +75,8 @@ trait LDAOptimizer {
  *    "On Smoothing and Inference for Topic Models."  UAI, 2009.
  *
  */
-@Experimental
-class EMLDAOptimizer extends LDAOptimizer {
+@DeveloperApi
+final class EMLDAOptimizer extends LDAOptimizer {
 
   import LDA._
 
@@ -211,7 +211,7 @@ class EMLDAOptimizer extends LDAOptimizer {
 
 
 /**
- * :: Experimental ::
+ * :: DeveloperApi ::
  *
  * An online optimizer for LDA. The Optimizer implements the Online variational Bayes LDA
  * algorithm, which processes a subset of the corpus on each iteration, and updates the term-topic
@@ -220,8 +220,8 @@ class EMLDAOptimizer extends LDAOptimizer {
  * Original Online LDA paper:
  *   Hoffman, Blei and Bach, "Online Learning for Latent Dirichlet Allocation." NIPS, 2010.
  */
-@Experimental
-class OnlineLDAOptimizer extends LDAOptimizer {
+@DeveloperApi
+final class OnlineLDAOptimizer extends LDAOptimizer {
 
   // LDA common parameters
   private var k: Int = 0
@@ -243,8 +243,8 @@ class OnlineLDAOptimizer extends LDAOptimizer {
   private var randomGenerator: java.util.Random = null
 
   // Online LDA specific parameters
-  // Learning rate is: (tau_0 + t)^{-kappa}
-  private var tau_0: Double = 1024
+  // Learning rate is: (tau0 + t)^{-kappa}
+  private var tau0: Double = 1024
   private var kappa: Double = 0.51
   private var miniBatchFraction: Double = 0.05
 
@@ -265,16 +265,16 @@ class OnlineLDAOptimizer extends LDAOptimizer {
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
    */
-  def getTau_0: Double = this.tau_0
+  def getTau0: Double = this.tau0
 
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
    * Default: 1024, following the original Online LDA paper.
    */
-  def setTau_0(tau_0: Double): this.type = {
-    require(tau_0 > 0,  s"LDA tau_0 must be positive, but was set to $tau_0")
-    this.tau_0 = tau_0
+  def setTau0(tau0: Double): this.type = {
+    require(tau0 > 0,  s"LDA tau0 must be positive, but was set to $tau0")
+    this.tau0 = tau0
     this
   }
 
@@ -434,11 +434,8 @@ class OnlineLDAOptimizer extends LDAOptimizer {
    * Update lambda based on the batch submitted. batchSize can be different for each iteration.
    */
   private[clustering] def update(stat: BDM[Double], iter: Int, batchSize: Int): Unit = {
-    val tau_0 = this.getTau_0
-    val kappa = this.getKappa
-
     // weight of the mini-batch.
-    val weight = math.pow(tau_0 + iter, -kappa)
+    val weight = math.pow(getTau0 + iter, -getKappa)
 
     // Update lambda based on documents.
     lambda = lambda * (1 - weight) +
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
index f394d903966de..96c2da169961f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
@@ -117,7 +117,7 @@ public void OnlineOptimizerCompatibility() {
 
     // Train a model
     OnlineLDAOptimizer op = new OnlineLDAOptimizer()
-      .setTau_0(1024)
+      .setTau0(1024)
       .setKappa(0.51)
       .setGammaShape(1e40)
       .setMiniBatchFraction(0.5);
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 2dcc881f5abd2..d5b7d96335744 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -138,12 +138,12 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
     val lda = new LDA().setK(2)
     val corpus = sc.parallelize(tinyCorpus, 2)
     val op = new OnlineLDAOptimizer().initialize(corpus, lda)
-    op.setKappa(0.9876).setMiniBatchFraction(0.123).setTau_0(567)
+    op.setKappa(0.9876).setMiniBatchFraction(0.123).setTau0(567)
     assert(op.getAlpha == 0.5) // default 1.0 / k
     assert(op.getEta == 0.5)   // default 1.0 / k
     assert(op.getKappa == 0.9876)
     assert(op.getMiniBatchFraction == 0.123)
-    assert(op.getTau_0 == 567)
+    assert(op.getTau0 == 567)
   }
 
   test("OnlineLDAOptimizer one iteration") {
@@ -159,7 +159,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
     val corpus = sc.parallelize(docs, 2)
 
     // Set GammaShape large to avoid the stochastic impact.
-    val op = new OnlineLDAOptimizer().setTau_0(1024).setKappa(0.51).setGammaShape(1e40)
+    val op = new OnlineLDAOptimizer().setTau0(1024).setKappa(0.51).setGammaShape(1e40)
       .setMiniBatchFraction(1)
     val lda = new LDA().setK(k).setMaxIterations(1).setOptimizer(op).setSeed(12345)
 
@@ -192,7 +192,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext {
     ).zipWithIndex.map { case (wordCounts, docId) => (docId.toLong, wordCounts) }
 
     val docs = sc.parallelize(toydata)
-    val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau_0(1024).setKappa(0.51)
+    val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau0(1024).setKappa(0.51)
       .setGammaShape(1e10)
     val lda = new LDA().setK(2)
       .setDocConcentration(0.01)

From 4f87e9562aa0dfe5467d7fbaba9278213106377c Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Thu, 7 May 2015 01:28:44 -0700
Subject: [PATCH 006/320] [SPARK-7429] [ML] Params cleanups

Params.setDefault taking a set of ParamPairs should be annotated with varargs. I thought it would not work before, but it apparently does.

CrossValidator.transform should call transformSchema since the underlying Model might be a PipelineModel

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5960 from jkbradley/params-cleanups and squashes the following commits:

118b158 [Joseph K. Bradley] Params.setDefault taking a set of ParamPairs should be annotated with varargs. I thought it would not work before, but it apparently does. CrossValidator.transform should call transformSchema since the underlying Model might be a PipelineModel
---
 mllib/src/main/scala/org/apache/spark/ml/param/params.scala   | 4 +---
 .../scala/org/apache/spark/ml/tuning/CrossValidator.scala     | 3 ++-
 .../test/java/org/apache/spark/ml/param/JavaTestParams.java   | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 51ce19d29cd29..6d09962fe6ee2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -366,13 +366,11 @@ trait Params extends Identifiable with Serializable {
   /**
    * Sets default values for a list of params.
    *
-   * Note: Java developers should use the single-parameter [[setDefault()]].
-   *       Annotating this with varargs causes compilation failures.
-   *
    * @param paramPairs  a list of param pairs that specify params and their default values to set
    *                    respectively. Make sure that the params are initialized before this method
    *                    gets called.
    */
+  @varargs
   protected final def setDefault(paramPairs: ParamPair[_]*): this.type = {
     paramPairs.foreach { p =>
       setDefault(p.param.asInstanceOf[Param[Any]], p.value)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 9208127eb1d79..ac0d1fed84b2e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -105,7 +105,7 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP
 
   override def fit(dataset: DataFrame): CrossValidatorModel = {
     val schema = dataset.schema
-    transformSchema(dataset.schema, logging = true)
+    transformSchema(schema, logging = true)
     val sqlCtx = dataset.sqlContext
     val est = $(estimator)
     val eval = $(evaluator)
@@ -159,6 +159,7 @@ class CrossValidatorModel private[ml] (
   }
 
   override def transform(dataset: DataFrame): DataFrame = {
+    transformSchema(dataset.schema, logging = true)
     bestModel.transform(dataset)
   }
 
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
index 8abe575610d19..532eca47918fc 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
@@ -59,5 +59,6 @@ public JavaTestParams() {
       ParamValidators.inArray(validStrings));
     setDefault(myIntParam, 1);
     setDefault(myDoubleParam, 0.5);
+    setDefault(myIntParam.w(1), myDoubleParam.w(0.5));
   }
 }

From ed9be06a4797bbb678355b361054c8872ac20b75 Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <daoyuan.wang@intel.com>
Date: Thu, 7 May 2015 10:05:01 -0700
Subject: [PATCH 007/320] [SPARK-7330] [SQL] avoid NPE at jdbc rdd

Thank nadavoosh point this out in #5590

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #5877 from adrian-wang/jdbcrdd and squashes the following commits:

cc11900 [Daoyuan Wang] avoid NPE in jdbcrdd
---
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   |  8 +++++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 25 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 325a326e2b5b2..1a5083dbe0f61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -362,7 +362,13 @@ private[sql] class JDBCRDD(
           conversions(i) match {
             case BooleanConversion    => mutableRow.setBoolean(i, rs.getBoolean(pos))
             case DateConversion       =>
-              mutableRow.update(i, DateUtils.fromJavaDate(rs.getDate(pos)))
+              // DateUtils.fromJavaDate does not handle null value, so we need to check it.
+              val dateVal = rs.getDate(pos)
+              if (dateVal != null) {
+                mutableRow.update(i, DateUtils.fromJavaDate(dateVal))
+              } else {
+                mutableRow.update(i, null)
+              }
             case DecimalConversion    =>
               val decimalVal = rs.getBigDecimal(pos)
               if (decimalVal == null) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 3ec17d37c025b..021affafe36a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -104,6 +104,8 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
         ).executeUpdate()
     conn.prepareStatement("insert into test.timetypes values ('12:34:56', "
       + "'1996-01-01', '2002-02-20 11:22:33.543543543')").executeUpdate()
+    conn.prepareStatement("insert into test.timetypes values ('12:34:56', "
+      + "null, '2002-02-20 11:22:33.543543543')").executeUpdate()
     conn.commit()
     sql(
       s"""
@@ -127,6 +129,23 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
         |OPTIONS (url '$url', dbtable 'TEST.FLTTYPES', user 'testUser', password 'testPass')
       """.stripMargin.replaceAll("\n", " "))
 
+    conn.prepareStatement(
+      s"""
+        |create table test.nulltypes (a INT, b BOOLEAN, c TINYINT, d BINARY(20), e VARCHAR(20),
+        |f VARCHAR_IGNORECASE(20), g CHAR(20), h BLOB, i CLOB, j TIME, k DATE, l TIMESTAMP,
+        |m DOUBLE, n REAL, o DECIMAL(40, 20))
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.prepareStatement("insert into test.nulltypes values ("
+      + "null, null, null, null, null, null, null, null, null, "
+      + "null, null, null, null, null, null)").executeUpdate()
+    conn.commit()
+    sql(
+      s"""
+         |CREATE TEMPORARY TABLE nulltypes
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$url', dbtable 'TEST.NULLTYPES', user 'testUser', password 'testPass')
+      """.stripMargin.replaceAll("\n", " "))
+
     // Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types.
   }
 
@@ -254,6 +273,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").collect()
     val cachedRows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").cache().collect()
     assert(rows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
+    assert(rows(1).getAs[java.sql.Date](1) === null)
     assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
   }
 
@@ -266,6 +286,11 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
   }
 
+  test("test types for null value") {
+    val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.NULLTYPES").collect()
+    assert((0 to 14).forall(i => rows(0).isNullAt(i)))
+  }
+
   test("H2 floating-point types") {
     val rows = sql("SELECT * FROM flttypes").collect()
     assert(rows(0).getDouble(0) === 1.00000000000000022) // Yes, I meant ==.

From 9e2ffb13287e6efe256b8d23a4654e4cc305e20b Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Thu, 7 May 2015 10:25:41 -0700
Subject: [PATCH 008/320] [SPARK-7388] [SPARK-7383] wrapper for VectorAssembler
 in Python

The wrapper required the implementation of the `ArrayParam`, because `Array[T]` is hard to obtain from Python. `ArrayParam` has an extra function called `wCast` which is an internal function to obtain `Array[T]` from `Seq[T]`

Author: Burak Yavuz <brkyvz@gmail.com>
Author: Xiangrui Meng <meng@databricks.com>

Closes #5930 from brkyvz/ml-feat and squashes the following commits:

73e745f [Burak Yavuz] Merge pull request #3 from mengxr/SPARK-7388
c221db9 [Xiangrui Meng] overload StringArrayParam.w
c81072d [Burak Yavuz] addressed comments
99c2ebf [Burak Yavuz] add to python_shared_params
39ecb07 [Burak Yavuz] fix scalastyle
7f7ea2a [Burak Yavuz] [SPARK-7388][SPARK-7383] wrapper for VectorAssembler in Python
---
 .../spark/ml/feature/VectorAssembler.scala    |  2 +-
 .../org/apache/spark/ml/param/params.scala    | 27 ++++++++++--
 .../ml/param/shared/SharedParamsCodeGen.scala |  1 +
 .../spark/ml/param/shared/sharedParams.scala  |  2 +-
 python/pyspark/ml/feature.py                  | 43 ++++++++++++++++++-
 .../ml/param/_shared_params_code_gen.py       |  1 +
 python/pyspark/ml/param/shared.py             | 29 +++++++++++++
 python/pyspark/ml/wrapper.py                  | 13 +++---
 8 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 8f2e62a8e2081..b5a69cee6daf3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.types._
 
 /**
  * :: AlphaComponent ::
- * A feature transformer than merge multiple columns into a vector column.
+ * A feature transformer that merges multiple columns into a vector column.
  */
 @AlphaComponent
 class VectorAssembler extends Transformer with HasInputCols with HasOutputCol {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 6d09962fe6ee2..0e1b60d172e3b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -22,6 +22,7 @@ import java.util.NoSuchElementException
 
 import scala.annotation.varargs
 import scala.collection.mutable
+import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.util.Identifiable
@@ -218,6 +219,19 @@ class BooleanParam(parent: Params, name: String, doc: String) // No need for isV
   override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
 }
 
+/** Specialized version of [[Param[Array[T]]]] for Java. */
+class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean)
+  extends Param[Array[String]](parent, name, doc, isValid) {
+
+  def this(parent: Params, name: String, doc: String) =
+    this(parent, name, doc, ParamValidators.alwaysTrue)
+
+  override def w(value: Array[String]): ParamPair[Array[String]] = super.w(value)
+
+  /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */
+  def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray)
+}
+
 /**
  * A param amd its value.
  */
@@ -310,9 +324,7 @@ trait Params extends Identifiable with Serializable {
    * Sets a parameter in the embedded param map.
    */
   protected final def set[T](param: Param[T], value: T): this.type = {
-    shouldOwn(param)
-    paramMap.put(param.asInstanceOf[Param[Any]], value)
-    this
+    set(param -> value)
   }
 
   /**
@@ -322,6 +334,15 @@ trait Params extends Identifiable with Serializable {
     set(getParam(param), value)
   }
 
+  /**
+   * Sets a parameter in the embedded param map.
+   */
+  protected final def set(paramPair: ParamPair[_]): this.type = {
+    shouldOwn(paramPair.param)
+    paramMap.put(paramPair)
+    this
+  }
+
   /**
    * Optionally returns the user-supplied value of a param.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 0e1ff97a8bf60..5085b798daa17 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -85,6 +85,7 @@ private[shared] object SharedParamsCodeGen {
         case _ if c == classOf[Float] => "FloatParam"
         case _ if c == classOf[Double] => "DoubleParam"
         case _ if c == classOf[Boolean] => "BooleanParam"
+        case _ if c.isArray && c.getComponentType == classOf[String] => s"StringArrayParam"
         case _ => s"Param[${getTypeString(c)}]"
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 87f86807c3c91..7525d37007377 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -178,7 +178,7 @@ private[ml] trait HasInputCols extends Params {
    * Param for input column names.
    * @group param
    */
-  final val inputCols: Param[Array[String]] = new Param[Array[String]](this, "inputCols", "input column names")
+  final val inputCols: StringArrayParam = new StringArrayParam(this, "inputCols", "input column names")
 
   /** @group getParam */
   final def getInputCols: Array[String] = $(inputCols)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 4e4614b859ac6..8a0fdddd2d9b5 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -16,12 +16,12 @@
 #
 
 from pyspark.rdd import ignore_unicode_prefix
-from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasNumFeatures
+from pyspark.ml.param.shared import HasInputCol, HasInputCols, HasOutputCol, HasNumFeatures
 from pyspark.ml.util import keyword_only
 from pyspark.ml.wrapper import JavaTransformer
 from pyspark.mllib.common import inherit_doc
 
-__all__ = ['Tokenizer', 'HashingTF']
+__all__ = ['Tokenizer', 'HashingTF', 'VectorAssembler']
 
 
 @inherit_doc
@@ -112,6 +112,45 @@ def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
         return self._set(**kwargs)
 
 
+@inherit_doc
+class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
+    """
+    A feature transformer that merges multiple columns into a vector column.
+
+    >>> from pyspark.sql import Row
+    >>> df = sc.parallelize([Row(a=1, b=0, c=3)]).toDF()
+    >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
+    >>> vecAssembler.transform(df).head().features
+    SparseVector(3, {0: 1.0, 2: 3.0})
+    >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs
+    SparseVector(3, {0: 1.0, 2: 3.0})
+    >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
+    >>> vecAssembler.transform(df, params).head().vector
+    SparseVector(2, {1: 1.0})
+    """
+
+    _java_class = "org.apache.spark.ml.feature.VectorAssembler"
+
+    @keyword_only
+    def __init__(self, inputCols=None, outputCol=None):
+        """
+        __init__(self, inputCols=None, outputCol=None)
+        """
+        super(VectorAssembler, self).__init__()
+        self._setDefault()
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, inputCols=None, outputCol=None):
+        """
+        setParams(self, inputCols=None, outputCol=None)
+        Sets params for this VectorAssembler.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
 if __name__ == "__main__":
     import doctest
     from pyspark.context import SparkContext
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index c71c823db2c81..c1c8e921dda87 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -95,6 +95,7 @@ def get$Name(self):
         ("predictionCol", "prediction column name", "'prediction'"),
         ("rawPredictionCol", "raw prediction column name", "'rawPrediction'"),
         ("inputCol", "input column name", None),
+        ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
         ("numFeatures", "number of features", None)]
     code = []
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 4f243844f8caa..aaf80f00085bf 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -223,6 +223,35 @@ def getInputCol(self):
         return self.getOrDefault(self.inputCol)
 
 
+class HasInputCols(Params):
+    """
+    Mixin for param inputCols: input column names.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    inputCols = Param(Params._dummy(), "inputCols", "input column names")
+
+    def __init__(self):
+        super(HasInputCols, self).__init__()
+        #: param for input column names
+        self.inputCols = Param(self, "inputCols", "input column names")
+        if None is not None:
+            self._setDefault(inputCols=None)
+
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        self.paramMap[self.inputCols] = value
+        return self
+
+    def getInputCols(self):
+        """
+        Gets the value of inputCols or its default value.
+        """
+        return self.getOrDefault(self.inputCols)
+
+
 class HasOutputCol(Params):
     """
     Mixin for param outputCol: output column name.
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 0634254bbd5cf..f5ac2a398642a 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -67,7 +67,9 @@ def _transfer_params_to_java(self, params, java_obj):
         paramMap = self.extractParamMap(params)
         for param in self.params:
             if param in paramMap:
-                java_obj.set(param.name, paramMap[param])
+                value = paramMap[param]
+                java_param = java_obj.getParam(param.name)
+                java_obj.set(java_param.w(value))
 
     def _empty_java_param_map(self):
         """
@@ -79,7 +81,8 @@ def _create_java_param_map(self, params, java_obj):
         paramMap = self._empty_java_param_map()
         for param, value in params.items():
             if param.parent is self:
-                paramMap.put(java_obj.getParam(param.name), value)
+                java_param = java_obj.getParam(param.name)
+                paramMap.put(java_param.w(value))
         return paramMap
 
 
@@ -126,10 +129,8 @@ class JavaTransformer(Transformer, JavaWrapper):
 
     def transform(self, dataset, params={}):
         java_obj = self._java_obj()
-        self._transfer_params_to_java({}, java_obj)
-        java_param_map = self._create_java_param_map(params, java_obj)
-        return DataFrame(java_obj.transform(dataset._jdf, java_param_map),
-                         dataset.sql_ctx)
+        self._transfer_params_to_java(params, java_obj)
+        return DataFrame(java_obj.transform(dataset._jdf), dataset.sql_ctx)
 
 
 @inherit_doc

From 068c3158ac0c66e20d90a45e6a2a0b93108e08d5 Mon Sep 17 00:00:00 2001
From: Olivier Girardot <o.girardot@lateral-thoughts.com>
Date: Thu, 7 May 2015 10:58:35 -0700
Subject: [PATCH 009/320] [SPARK-7118] [Python] Add the coalesce Spark SQL
 function available in PySpark

This patch adds a proxy call from PySpark to the Spark SQL coalesce function and this patch comes out of a discussion on devspark with rxin

This contribution is my original work and i license the work to the project under the project's open source license.

Olivier.

Author: Olivier Girardot <o.girardot@lateral-thoughts.com>

Closes #5698 from ogirardot/master and squashes the following commits:

d9a4439 [Olivier Girardot] SPARK-7118 Add the coalesce Spark SQL function available in PySpark
---
 python/pyspark/sql/functions.py | 37 +++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 274c410a1ee9c..38a043a3c59d7 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -37,6 +37,7 @@
     'rand',
     'randn',
     'sparkPartitionId',
+    'coalesce',
     'udf']
 
 
@@ -167,6 +168,42 @@ def approxCountDistinct(col, rsd=None):
     return Column(jc)
 
 
+def coalesce(*cols):
+    """Returns the first column that is not null.
+
+    >>> cDf = sqlContext.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
+    >>> cDf.show()
+    +----+----+
+    |   a|   b|
+    +----+----+
+    |null|null|
+    |   1|null|
+    |null|   2|
+    +----+----+
+
+    >>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
+    +-------------+
+    |Coalesce(a,b)|
+    +-------------+
+    |         null|
+    |            1|
+    |            2|
+    +-------------+
+
+    >>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
+    +----+----+---------------+
+    |   a|   b|Coalesce(a,0.0)|
+    +----+----+---------------+
+    |null|null|            0.0|
+    |   1|null|            1.0|
+    |null|   2|            0.0|
+    +----+----+---------------+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
 def countDistinct(col, *cols):
     """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
 

From 1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 7 May 2015 11:18:32 -0700
Subject: [PATCH 010/320] [SPARK-6093] [MLLIB] Add RegressionMetrics in
 PySpark/MLlib

https://issues.apache.org/jira/browse/SPARK-6093

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #5941 from yanboliang/spark-6093 and squashes the following commits:

6934af3 [Yanbo Liang] change to @property
aac3bc5 [Yanbo Liang] Add RegressionMetrics in PySpark/MLlib
---
 .../mllib/evaluation/RegressionMetrics.scala  |  9 +++
 python/pyspark/mllib/evaluation.py            | 78 ++++++++++++++++++-
 2 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 693117d820580..e577bf87f885e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -22,6 +22,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.Logging
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer}
+import org.apache.spark.sql.DataFrame
 
 /**
  * :: Experimental ::
@@ -32,6 +33,14 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Multivariate
 @Experimental
 class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
 
+  /**
+   * An auxiliary constructor taking a DataFrame.
+   * @param predictionAndObservations a DataFrame with two double columns:
+   *                                  prediction and observation
+   */
+  private[mllib] def this(predictionAndObservations: DataFrame) =
+    this(predictionAndObservations.map(r => (r.getDouble(0), r.getDouble(1))))
+
   /**
    * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
    */
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 16cb49cc0cfff..3e11df09da6b1 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -27,9 +27,9 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     >>> scoreAndLabels = sc.parallelize([
     ...     (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2)
     >>> metrics = BinaryClassificationMetrics(scoreAndLabels)
-    >>> metrics.areaUnderROC()
+    >>> metrics.areaUnderROC
     0.70...
-    >>> metrics.areaUnderPR()
+    >>> metrics.areaUnderPR
     0.83...
     >>> metrics.unpersist()
     """
@@ -47,6 +47,7 @@ def __init__(self, scoreAndLabels):
         java_model = java_class(df._jdf)
         super(BinaryClassificationMetrics, self).__init__(java_model)
 
+    @property
     def areaUnderROC(self):
         """
         Computes the area under the receiver operating characteristic
@@ -54,6 +55,7 @@ def areaUnderROC(self):
         """
         return self.call("areaUnderROC")
 
+    @property
     def areaUnderPR(self):
         """
         Computes the area under the precision-recall curve.
@@ -67,6 +69,78 @@ def unpersist(self):
         self.call("unpersist")
 
 
+class RegressionMetrics(JavaModelWrapper):
+    """
+    Evaluator for regression.
+
+    >>> predictionAndObservations = sc.parallelize([
+    ...     (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)])
+    >>> metrics = RegressionMetrics(predictionAndObservations)
+    >>> metrics.explainedVariance
+    0.95...
+    >>> metrics.meanAbsoluteError
+    0.5...
+    >>> metrics.meanSquaredError
+    0.37...
+    >>> metrics.rootMeanSquaredError
+    0.61...
+    >>> metrics.r2
+    0.94...
+    """
+
+    def __init__(self, predictionAndObservations):
+        """
+        :param predictionAndObservations: an RDD of (prediction, observation) pairs.
+        """
+        sc = predictionAndObservations.ctx
+        sql_ctx = SQLContext(sc)
+        df = sql_ctx.createDataFrame(predictionAndObservations, schema=StructType([
+            StructField("prediction", DoubleType(), nullable=False),
+            StructField("observation", DoubleType(), nullable=False)]))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.RegressionMetrics
+        java_model = java_class(df._jdf)
+        super(RegressionMetrics, self).__init__(java_model)
+
+    @property
+    def explainedVariance(self):
+        """
+        Returns the explained variance regression score.
+        explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
+        """
+        return self.call("explainedVariance")
+
+    @property
+    def meanAbsoluteError(self):
+        """
+        Returns the mean absolute error, which is a risk function corresponding to the
+        expected value of the absolute error loss or l1-norm loss.
+        """
+        return self.call("meanAbsoluteError")
+
+    @property
+    def meanSquaredError(self):
+        """
+        Returns the mean squared error, which is a risk function corresponding to the
+        expected value of the squared error loss or quadratic loss.
+        """
+        return self.call("meanSquaredError")
+
+    @property
+    def rootMeanSquaredError(self):
+        """
+        Returns the root mean squared error, which is defined as the square root of
+        the mean squared error.
+        """
+        return self.call("rootMeanSquaredError")
+
+    @property
+    def r2(self):
+        """
+        Returns R^2^, the coefficient of determination.
+        """
+        return self.call("r2")
+
+
 def _test():
     import doctest
     from pyspark import SparkContext

From 5784c8d95561dce432a85401e1510776fdf723a8 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 7 May 2015 11:46:49 -0700
Subject: [PATCH 011/320] [SPARK-1442] [SQL] [FOLLOW-UP] Address minor comments
 in Window Function PR (#5604).

Address marmbrus and scwf's comments in #5604.

Author: Yin Huai <yhuai@databricks.com>

Closes #5945 from yhuai/windowFollowup and squashes the following commits:

0ef879d [Yin Huai] Add collectFirst to TreeNode.
2373968 [Yin Huai] wip
4a16df9 [Yin Huai] Address minor comments for [SPARK-1442].
---
 .../sql/catalyst/analysis/Analyzer.scala      | 13 +++--
 .../spark/sql/catalyst/trees/TreeNode.scala   | 13 ++++-
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 50 +++++++++++++++++++
 3 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7b543b6c2aa42..7e46ad851cdd3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -638,11 +638,10 @@ class Analyzer(
     def addWindow(windowExpressions: Seq[NamedExpression], child: LogicalPlan): LogicalPlan = {
       // First, we group window expressions based on their Window Spec.
       val groupedWindowExpression = windowExpressions.groupBy { expr =>
-        val windowExpression = expr.find {
-          case window: WindowExpression => true
-          case other => false
-        }.map(_.asInstanceOf[WindowExpression].windowSpec)
-        windowExpression.getOrElse(
+        val windowSpec = expr.collectFirst {
+          case window: WindowExpression => window.windowSpec
+        }
+        windowSpec.getOrElse(
           failAnalysis(s"$windowExpressions does not have any WindowExpression."))
       }.toSeq
 
@@ -685,7 +684,7 @@ class Analyzer(
       case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))
         if child.resolved &&
            hasWindowFunction(aggregateExprs) &&
-           !a.expressions.exists(!_.resolved) =>
+           a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
@@ -702,7 +701,7 @@ class Analyzer(
       // Aggregate without Having clause.
       case a @ Aggregate(groupingExprs, aggregateExprs, child)
         if hasWindowFunction(aggregateExprs) &&
-           !a.expressions.exists(!_.resolved) =>
+           a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 4b93f7d31b808..bc2ad34523d2c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -130,6 +130,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
     ret
   }
 
+  /**
+   * Finds and returns the first [[TreeNode]] of the tree for which the given partial function
+   * is defined (pre-order), and applies the partial function to it.
+   */
+  def collectFirst[B](pf: PartialFunction[BaseType, B]): Option[B] = {
+    val lifted = pf.lift
+    lifted(this).orElse {
+      children.foldLeft(None: Option[B]) { (l, r) => l.orElse(r.collectFirst(pf)) }
+    }
+  }
+
   /**
    * Returns a copy of this node where `f` has been applied to all the nodes children.
    */
@@ -160,7 +171,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
     val remainingNewChildren = newChildren.toBuffer
     val remainingOldChildren = children.toBuffer
     val newArgs = productIterator.map {
-      // This rule is used to handle children is a input argument.
+      // Handle Seq[TreeNode] in TreeNode parameters.
       case s: Seq[_] => s.map {
         case arg: TreeNode[_] if children contains arg =>
           val newChild = remainingNewChildren.remove(0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 786ddba403f2c..3d10dab5ba34c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -172,4 +172,54 @@ class TreeNodeSuite extends FunSuite {
     expected = None
     assert(expected === actual)
   }
+
+  test("collectFirst") {
+    val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))
+
+    // Collect the top node.
+    {
+      val actual = expression.collectFirst {
+        case add: Add => add
+      }
+      val expected =
+        Some(Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4)))))
+      assert(expected === actual)
+    }
+
+    // Collect the first children.
+    {
+      val actual = expression.collectFirst {
+        case l @ Literal(1, IntegerType) => l
+      }
+      val expected = Some(Literal(1))
+      assert(expected === actual)
+    }
+
+    // Collect an internal node (Subtract).
+    {
+      val actual = expression.collectFirst {
+        case sub: Subtract => sub
+      }
+      val expected = Some(Subtract(Literal(3), Literal(4)))
+      assert(expected === actual)
+    }
+
+    // Collect a leaf node.
+    {
+      val actual = expression.collectFirst {
+        case l @ Literal(3, IntegerType) => l
+      }
+      val expected = Some(Literal(3))
+      assert(expected === actual)
+    }
+
+    // Collect nothing.
+    {
+      val actual = expression.collectFirst {
+        case l @ Literal(100, IntegerType) => l
+      }
+      val expected = None
+      assert(expected === actual)
+    }
+  }
 }

From dec8f53719597119034dffbe43b2a9e5fd963083 Mon Sep 17 00:00:00 2001
From: ksonj <kson@siberie.de>
Date: Thu, 7 May 2015 12:04:19 -0700
Subject: [PATCH 012/320] [SPARK-7116] [SQL] [PYSPARK] Remove cache() causing
 memory leak

This patch simply removes a `cache()` on an intermediate RDD when evaluating Python UDFs.

Author: ksonj <kson@siberie.de>

Closes #5973 from ksonj/udf and squashes the following commits:

db5b564 [ksonj] removed TODO about cleaning up
fe70c54 [ksonj] Remove cache() causing memory leak
---
 .../scala/org/apache/spark/sql/execution/pythonUdfs.scala  | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index 7a43bfd8bc8d9..58cb1980f217e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -219,8 +219,8 @@ case class EvaluatePython(
 
 /**
  * :: DeveloperApi ::
- * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time.  The input
- * data is cached and zipped with the result of the udf evaluation.
+ * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time.
+ * The input data is zipped with the result of the udf evaluation.
  */
 @DeveloperApi
 case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: SparkPlan)
@@ -229,8 +229,7 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child:
   def children: Seq[SparkPlan] = child :: Nil
 
   def execute(): RDD[Row] = {
-    // TODO: Clean up after ourselves?
-    val childResults = child.execute().map(_.copy()).cache()
+    val childResults = child.execute().map(_.copy())
 
     val parent = childResults.mapPartitions { iter =>
       val pickle = new Pickler

From 074d75d4c8ce7c0296ded8f4c4868a9210320222 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Thu, 7 May 2015 12:09:54 -0700
Subject: [PATCH 013/320] [SPARK-5213] [SQL] Remove the duplicated
 SparkSQLParser

This is a follow up of #5827 to remove the additional `SparkSQLParser`

Author: Cheng Hao <hao.cheng@intel.com>

Closes #5965 from chenghao-intel/remove_sparksqlparser and squashes the following commits:

509a233 [Cheng Hao] Remove the HiveQlQueryExecution
a5f9e3b [Cheng Hao] Remove the duplicated SparkSQLParser
---
 .../org/apache/spark/sql/hive/HiveQl.scala      |  7 ++-----
 .../apache/spark/sql/hive/test/TestHive.scala   | 17 ++++-------------
 .../sql/hive/execution/HiveComparisonTest.scala |  8 ++++----
 .../spark/sql/hive/execution/PruningSuite.scala |  2 +-
 4 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 8a0686a2d81c9..4e51473979a2a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.ql.lib.Node
 import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
-import org.apache.spark.sql.{AnalysisException, SparkSQLParser}
+import org.apache.spark.sql.AnalysisException
 
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
@@ -140,10 +140,7 @@ private[hive] object HiveQl {
     "TOK_TRUNCATETABLE"     // truncate table" is a NativeCommand, does not need to explain.
   ) ++ nativeCommands
 
-  protected val hqlParser = {
-    val fallback = new ExtendedHiveQlParser
-    new SparkSQLParser(fallback.parse(_))
-  }
+  protected val hqlParser = new ExtendedHiveQlParser
 
   /**
    * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index edeab5158df62..ca84b43a998b8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -94,7 +94,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   lazy val hiveDevHome = envVarToFile("HIVE_DEV_HOME")
 
   // Override so we can intercept relative paths and rewrite them to point at hive.
-  override def runSqlHive(sql: String): Seq[String] = super.runSqlHive(rewritePaths(sql))
+  override def runSqlHive(sql: String): Seq[String] =
+    super.runSqlHive(rewritePaths(substitutor.substitute(this.hiveconf, sql)))
 
   override def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution(plan)
@@ -157,22 +158,12 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   val describedTable = "DESCRIBE (\\w+)".r
 
-  val vs = new VariableSubstitution()
-
-  // we should substitute variables in hql to pass the text to parseSql() as a parameter.
-  // Hive parser need substituted text. HiveContext.sql() does this but return a DataFrame,
-  // while we need a logicalPlan so we cannot reuse that.
-  protected[hive] class HiveQLQueryExecution(hql: String)
-    extends this.QueryExecution(HiveQl.parseSql(vs.substitute(hiveconf, hql))) {
-    def hiveExec(): Seq[String] = runSqlHive(hql)
-    override def toString: String = hql + "\n" + super.toString
-  }
-
   /**
    * Override QueryExecution with special debug workflow.
    */
   class QueryExecution(logicalPlan: LogicalPlan)
     extends super.QueryExecution(logicalPlan) {
+    def this(sql: String) = this(parseSql(sql))
     override lazy val analyzed = {
       val describedTables = logical match {
         case HiveNativeCommand(describedTable(tbl)) => tbl :: Nil
@@ -196,7 +187,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   protected[hive] implicit class SqlCmd(sql: String) {
     def cmd: () => Unit = {
-      () => new HiveQLQueryExecution(sql).stringResult(): Unit
+      () => new QueryExecution(sql).stringResult(): Unit
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 5ead5f3c19908..a3eacbd4e3981 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -129,7 +129,7 @@ abstract class HiveComparisonTest
   }
 
   protected def prepareAnswer(
-    hiveQuery: TestHive.type#HiveQLQueryExecution,
+    hiveQuery: TestHive.type#QueryExecution,
     answer: Seq[String]): Seq[String] = {
 
     def isSorted(plan: LogicalPlan): Boolean = plan match {
@@ -298,7 +298,7 @@ abstract class HiveComparisonTest
             hiveCachedResults
           } else {
 
-            val hiveQueries = queryList.map(new TestHive.HiveQLQueryExecution(_))
+            val hiveQueries = queryList.map(new TestHive.QueryExecution(_))
             // Make sure we can at least parse everything before attempting hive execution.
             hiveQueries.foreach(_.logical)
             val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
@@ -346,7 +346,7 @@ abstract class HiveComparisonTest
 
         // Run w/ catalyst
         val catalystResults = queryList.zip(hiveResults).map { case (queryString, hive) =>
-          val query = new TestHive.HiveQLQueryExecution(queryString)
+          val query = new TestHive.QueryExecution(queryString)
           try { (query, prepareAnswer(query, query.stringResult())) } catch {
             case e: Throwable =>
               val errorMessage =
@@ -402,7 +402,7 @@ abstract class HiveComparisonTest
             // okay by running a simple query. If this fails then we halt testing since
             // something must have gone seriously wrong.
             try {
-              new TestHive.HiveQLQueryExecution("SELECT key FROM src").stringResult()
+              new TestHive.QueryExecution("SELECT key FROM src").stringResult()
               TestHive.runSqlHive("SELECT key FROM src")
             } catch {
               case e: Exception =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 067b577f1560e..45f10e2fe64aa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -145,7 +145,7 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
       expectedScannedColumns: Seq[String],
       expectedPartValues: Seq[Seq[String]]): Unit = {
     test(s"$testCaseName - pruning test") {
-      val plan = new TestHive.HiveQLQueryExecution(sql).executedPlan
+      val plan = new TestHive.QueryExecution(sql).executedPlan
       val actualOutputColumns = plan.output.map(_.name)
       val (actualScannedColumns, actualPartValues) = plan.collect {
         case p @ HiveTableScan(columns, relation, _) =>

From 0c33bf817cef457873007959f3539f93f907dccd Mon Sep 17 00:00:00 2001
From: Tijo Thomas <tijoparacka@gmail.com>
Date: Thu, 7 May 2015 12:21:09 -0700
Subject: [PATCH 014/320] [SPARK-7399] [SPARK CORE] Fixed compilation error in
 scala 2.11

scala has deterministic naming-scheme for the generated methods which return default arguments . here one of the default argument of overloaded method has to be removed

Author: Tijo Thomas <tijoparacka@gmail.com>

Closes #5966 from tijoparacka/fix_compilation_error_in_scala2.11 and squashes the following commits:

c90bba8 [Tijo Thomas] Fixed compilation error in scala 2.11
---
 .../src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 537b56b49f866..9440d456edf15 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -111,7 +111,7 @@ private[spark] object RDDOperationScope {
   private[spark] def withScope[T](
       sc: SparkContext,
       name: String,
-      allowNesting: Boolean = false)(body: => T): T = {
+      allowNesting: Boolean)(body: => T): T = {
     // Save the old scope to restore it later
     val scopeKey = SparkContext.RDD_SCOPE_KEY
     val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY

From 4eecf550aa7e4fb448baca82281bfd4e8bc4a778 Mon Sep 17 00:00:00 2001
From: Timothy Chen <tnachen@gmail.com>
Date: Thu, 7 May 2015 12:23:16 -0700
Subject: [PATCH 015/320] [SPARK-7373] [MESOS] Add docker support for launching
 drivers in mesos cluster mode.

Using the existing docker support for mesos, also enabling the mesos cluster mode scheduler to launch Spark drivers in docker images as well.

This also allows the executors launched by the drivers to be also in the same Docker image by passing  the docker settings.

Author: Timothy Chen <tnachen@gmail.com>

Closes #5917 from tnachen/spark_cluster_docker and squashes the following commits:

1e842f5 [Timothy Chen] Add docker support for launching drivers in mesos cluster mode.
---
 .../cluster/mesos/MesosClusterScheduler.scala | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 06f0e2881c344..1067a7f1caf4c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -370,16 +370,21 @@ private[spark] class MesosClusterScheduler(
     val executorOpts = desc.schedulerProperties.map { case (k, v) => s"-D$k=$v" }.mkString(" ")
     envBuilder.addVariables(
       Variable.newBuilder().setName("SPARK_EXECUTOR_OPTS").setValue(executorOpts))
-    val cmdOptions = generateCmdOption(desc)
+    val cmdOptions = generateCmdOption(desc).mkString(" ")
+    val dockerDefined = desc.schedulerProperties.contains("spark.mesos.executor.docker.image")
     val executorUri = desc.schedulerProperties.get("spark.executor.uri")
       .orElse(desc.command.environment.get("SPARK_EXECUTOR_URI"))
     val appArguments = desc.command.arguments.mkString(" ")
-    val cmd = if (executorUri.isDefined) {
+    val (executable, jar) = if (dockerDefined) {
+      // Application jar is automatically downloaded in the mounted sandbox by Mesos,
+      // and the path to the mounted volume is stored in $MESOS_SANDBOX env variable.
+      ("./bin/spark-submit", s"$$MESOS_SANDBOX/${desc.jarUrl.split("/").last}")
+    } else if (executorUri.isDefined) {
       builder.addUris(CommandInfo.URI.newBuilder().setValue(executorUri.get).build())
       val folderBasename = executorUri.get.split('/').last.split('.').head
       val cmdExecutable = s"cd $folderBasename*; $prefixEnv bin/spark-submit"
       val cmdJar = s"../${desc.jarUrl.split("/").last}"
-      s"$cmdExecutable ${cmdOptions.mkString(" ")} $cmdJar $appArguments"
+      (cmdExecutable, cmdJar)
     } else {
       val executorSparkHome = desc.schedulerProperties.get("spark.mesos.executor.home")
         .orElse(conf.getOption("spark.home"))
@@ -389,9 +394,9 @@ private[spark] class MesosClusterScheduler(
         }
       val cmdExecutable = new File(executorSparkHome, "./bin/spark-submit").getCanonicalPath
       val cmdJar = desc.jarUrl.split("/").last
-      s"$cmdExecutable ${cmdOptions.mkString(" ")} $cmdJar $appArguments"
+      (cmdExecutable, cmdJar)
     }
-    builder.setValue(cmd)
+    builder.setValue(s"$executable $cmdOptions $jar $appArguments")
     builder.setEnvironment(envBuilder.build())
     builder.build()
   }
@@ -458,9 +463,20 @@ private[spark] class MesosClusterScheduler(
           .setCommand(commandInfo)
           .addResources(cpuResource)
           .addResources(memResource)
-          .build()
+        submission.schedulerProperties.get("spark.mesos.executor.docker.image").foreach { image =>
+          val container = taskInfo.getContainerBuilder()
+          val volumes = submission.schedulerProperties
+            .get("spark.mesos.executor.docker.volumes")
+            .map(MesosSchedulerBackendUtil.parseVolumesSpec)
+          val portmaps = submission.schedulerProperties
+            .get("spark.mesos.executor.docker.portmaps")
+            .map(MesosSchedulerBackendUtil.parsePortMappingsSpec)
+          MesosSchedulerBackendUtil.addDockerInfo(
+            container, image, volumes = volumes, portmaps = portmaps)
+          taskInfo.setContainer(container.build())
+        }
         val queuedTasks = tasks.getOrElseUpdate(offer.offer.getId, new ArrayBuffer[TaskInfo])
-        queuedTasks += taskInfo
+        queuedTasks += taskInfo.build()
         logTrace(s"Using offer ${offer.offer.getId.getValue} to launch driver " +
           submission.submissionId)
         val newState = new MesosClusterSubmissionState(submission, taskId, offer.offer.getSlaveId,

From f1216514b830eadcdfff6fca044afd6ae1585800 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 7 May 2015 12:29:18 -0700
Subject: [PATCH 016/320] [SPARK-7391] DAG visualization: auto expand if linked
 from another viz

This is an addition to #5729.

If you click into a stage from the DAG viz on the job page, you might expect to expand on the stage. However, once you get to the stage page, you actually have to expand the DAG viz there yourself.

This patch makes this happen automatically. It's a small UX improvement.

Author: Andrew Or <andrew@databricks.com>

Closes #5958 from andrewor14/viz-auto-expand and squashes the following commits:

03cd157 [Andrew Or] Automatically expand DAG viz if from job page
---
 .../org/apache/spark/ui/static/spark-dag-viz.js      |  4 ++--
 .../src/main/scala/org/apache/spark/ui/UIUtils.scala | 10 +++++++++-
 .../scala/org/apache/spark/ui/jobs/StagePage.scala   | 12 ++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 76eb2c464139f..cda27cad7ee9d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -178,8 +178,8 @@ function renderDagVizForJob(svgContainer) {
     var stageId = metadata.attr("stage-id");
     var containerId = VizConstants.graphPrefix + stageId;
     // Link each graph to the corresponding stage page (TODO: handle stage attempts)
-    var stageLink =
-      "/stages/stage/?id=" + stageId.replace(VizConstants.stagePrefix, "") + "&attempt=0";
+    var stageLink = "/stages/stage/?id=" +
+      stageId.replace(VizConstants.stagePrefix, "") + "&attempt=0&expandDagViz=true";
     var container = svgContainer
       .append("a")
       .attr("xlink:href", stageLink)
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index e2d03f8342315..97eed13c2d780 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui
 import java.text.SimpleDateFormat
 import java.util.{Locale, Date}
 
-import scala.xml.{Node, Text}
+import scala.xml.{Node, Text, Unparsed}
 
 import org.apache.spark.Logging
 import org.apache.spark.ui.scope.RDDOperationGraph
@@ -371,4 +371,12 @@ private[spark] object UIUtils extends Logging {
       </div>
     </div>
   }
+
+  /** Return a script element that automatically expands the DAG visualization on page load. */
+  def expandDagVizOnLoad(forJob: Boolean): Seq[Node] = {
+    <script type="text/javascript">
+      {Unparsed("$(document).ready(function() { toggleDagViz(" + forJob + ") });")}
+    </script>
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 579310070c76c..6c4305873cbd9 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -44,6 +44,10 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val parameterAttempt = request.getParameter("attempt")
       require(parameterAttempt != null && parameterAttempt.nonEmpty, "Missing attempt parameter")
 
+      // If this is set, expand the dag visualization by default
+      val expandDagVizParam = request.getParameter("expandDagViz")
+      val expandDagViz = expandDagVizParam != null && expandDagVizParam.toBoolean
+
       val stageId = parameterId.toInt
       val stageAttemptId = parameterAttempt.toInt
       val stageDataOption = progressListener.stageIdToData.get((stageId, stageAttemptId))
@@ -174,6 +178,13 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val dagViz = UIUtils.showDagVizForStage(
         stageId, operationGraphListener.getOperationGraphForStage(stageId))
 
+      val maybeExpandDagViz: Seq[Node] =
+        if (expandDagViz) {
+          UIUtils.expandDagVizOnLoad(forJob = false)
+        } else {
+          Seq.empty
+        }
+
       val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value")
       def accumulableRow(acc: AccumulableInfo): Elem =
         <tr><td>{acc.name}</td><td>{acc.value}</td></tr>
@@ -440,6 +451,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         summary ++
         showAdditionalMetrics ++
         dagViz ++
+        maybeExpandDagViz ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
         <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++

From 88717ee4e7542ac8d5d2e5756c912dd390b37e88 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 7 May 2015 12:29:56 -0700
Subject: [PATCH 017/320] [SPARK-7347] DAG visualization: add tooltips to RDDs

This is an addition to #5729.

Here's an example with ALS.
<img src="https://issues.apache.org/jira/secure/attachment/12731039/tooltip.png" width="400px"></img>

Author: Andrew Or <andrew@databricks.com>

Closes #5957 from andrewor14/viz-hover2 and squashes the following commits:

60e3758 [Andrew Or] Add tooltips for RDDs on job page
---
 .../org/apache/spark/ui/static/spark-dag-viz.js  | 16 ++++++++++++++++
 .../spark/ui/scope/RDDOperationGraph.scala       |  5 +++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index cda27cad7ee9d..a0e3e914c2547 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -218,6 +218,7 @@ function renderDagVizForJob(svgContainer) {
     });
   });
 
+  addTooltipsForRDDs(svgContainer);
   drawCrossStageEdges(crossStageEdges, svgContainer);
 }
 
@@ -424,6 +425,21 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
   edgesContainer.append("path").datum(points).attr("d", line);
 }
 
+/* (Job page only) Helper function to add tooltips for RDDs. */
+function addTooltipsForRDDs(svgContainer) {
+  svgContainer.selectAll("g.node").each(function() {
+    var node = d3.select(this);
+    var tooltipText = node.attr("name");
+    if (tooltipText) {
+      node.select("circle")
+        .attr("data-toggle", "tooltip")
+        .attr("data-placement", "right")
+        .attr("title", tooltipText)
+    }
+  });
+  $("[data-toggle=tooltip]").tooltip({container: "body"});
+}
+
 /* Helper function to convert attributes to numeric values. */
 function toFloat(f) {
   if (f) {
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index edf005f7f325a..2b2db9e62be4e 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -178,10 +178,11 @@ private[ui] object RDDOperationGraph extends Logging {
    * On the stage page, it is displayed as a box with an embedded label.
    */
   private def makeDotNode(node: RDDOperationNode, forJob: Boolean): String = {
+    val label = s"${node.name} (${node.id})"
     if (forJob) {
-      s"""${node.id} [label=" " shape="circle" padding="5" labelStyle="font-size: 0"]"""
+      s"""${node.id} [label="$label" shape="circle" padding="5" labelStyle="font-size: 0"]"""
     } else {
-      s"""${node.id} [label="${node.name} (${node.id})" padding="5" labelStyle="font-size: 10"]"""
+      s"""${node.id} [label="$label" padding="5" labelStyle="font-size: 10"]"""
     }
   }
 

From 347a329a36c94ff37363e4dffcbd5a24dc6a6714 Mon Sep 17 00:00:00 2001
From: MechCoder <manojkumarsivaraj334@gmail.com>
Date: Thu, 7 May 2015 14:02:05 -0700
Subject: [PATCH 018/320] [SPARK-7328] [MLLIB] [PYSPARK]
 Pyspark.mllib.linalg.Vectors: Missing items

Add
1. Class methods squared_dist
3. parse
4. norm
5. numNonzeros
6. copy

I made a few vectorizations wrt squared_dist and dot as well. I have added support for SparseMatrix serialization in a separate PR (https://github.com/apache/spark/pull/5775) and plan to complete support for Matrices in another PR.

Author: MechCoder <manojkumarsivaraj334@gmail.com>

Closes #5872 from MechCoder/local_linalg_api and squashes the following commits:

a8ff1e0 [MechCoder] minor
ce3e53e [MechCoder] Add error message for parser
1bd3c04 [MechCoder] Robust parser and removed unnecessary methods
f779561 [MechCoder] [SPARK-7328] Pyspark.mllib.linalg.Vectors: Missing items
---
 python/pyspark/mllib/linalg.py | 148 ++++++++++++++++++++++++++++++++-
 python/pyspark/mllib/tests.py  |  25 +++++-
 2 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 9f3b0baf9f19f..23d1a79ffe511 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -208,9 +208,46 @@ def __init__(self, ar):
             ar = ar.astype(np.float64)
         self.array = ar
 
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the DenseVector.
+
+        >>> DenseVector.parse(' [ 0.0,1.0,2.0,  3.0]')
+        DenseVector([0.0, 1.0, 2.0, 3.0])
+        """
+        start = s.find('[')
+        if start == -1:
+            raise ValueError("Array should start with '['.")
+        end = s.find(']')
+        if end == -1:
+            raise ValueError("Array should end with ']'.")
+        s = s[start + 1: end]
+
+        try:
+            values = [float(val) for val in s.split(',')]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s" % s)
+        return DenseVector(values)
+
     def __reduce__(self):
         return DenseVector, (self.array.tostring(),)
 
+    def numNonzeros(self):
+        return np.count_nonzero(self.array)
+
+    def norm(self, p):
+        """
+        Calculte the norm of a DenseVector.
+
+        >>> a = DenseVector([0, -1, 2, -3])
+        >>> a.norm(2)
+        3.7...
+        >>> a.norm(1)
+        6.0
+        """
+        return np.linalg.norm(self.array, p)
+
     def dot(self, other):
         """
         Compute the dot product of two Vectors. We support
@@ -387,8 +424,74 @@ def __init__(self, size, *args):
                 if self.indices[i] >= self.indices[i + 1]:
                     raise TypeError("indices array must be sorted")
 
+    def numNonzeros(self):
+        return np.count_nonzero(self.values)
+
+    def norm(self, p):
+        """
+        Calculte the norm of a SparseVector.
+
+        >>> a = SparseVector(4, [0, 1], [3., -4.])
+        >>> a.norm(1)
+        7.0
+        >>> a.norm(2)
+        5.0
+        """
+        return np.linalg.norm(self.values, p)
+
     def __reduce__(self):
-        return (SparseVector, (self.size, self.indices.tostring(), self.values.tostring()))
+        return (
+            SparseVector,
+            (self.size, self.indices.tostring(), self.values.tostring()))
+
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the DenseVector.
+
+        >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )')
+        SparseVector(4, {0: 4.0, 1: 5.0})
+        """
+        start = s.find('(')
+        if start == -1:
+            raise ValueError("Tuple should start with '('")
+        end = s.find(')')
+        if start == -1:
+            raise ValueError("Tuple should end with ')'")
+        s = s[start + 1: end].strip()
+
+        size = s[: s.find(',')]
+        try:
+            size = int(size)
+        except ValueError:
+            raise ValueError("Cannot parse size %s." % size)
+
+        ind_start = s.find('[')
+        if ind_start == -1:
+            raise ValueError("Indices array should start with '['.")
+        ind_end = s.find(']')
+        if ind_end == -1:
+            raise ValueError("Indices array should end with ']'")
+        new_s = s[ind_start + 1: ind_end]
+        ind_list = new_s.split(',')
+        try:
+            indices = [int(ind) for ind in ind_list]
+        except ValueError:
+            raise ValueError("Unable to parse indices from %s." % new_s)
+        s = s[ind_end + 1:].strip()
+
+        val_start = s.find('[')
+        if val_start == -1:
+            raise ValueError("Values array should start with '['.")
+        val_end = s.find(']')
+        if val_end == -1:
+            raise ValueError("Values array should end with ']'.")
+        val_list = s[val_start + 1: val_end].split(',')
+        try:
+            values = [float(val) for val in val_list]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s." % s)
+        return SparseVector(size, indices, values)
 
     def dot(self, other):
         """
@@ -633,6 +736,49 @@ def stringify(vector):
         """
         return str(vector)
 
+    @staticmethod
+    def squared_distance(v1, v2):
+        """
+        Squared distance between two vectors.
+        a and b can be of type SparseVector, DenseVector, np.ndarray
+        or array.array.
+
+        >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
+        >>> b = Vectors.dense([2, 5, 4, 1])
+        >>> a.squared_distance(b)
+        51.0
+        """
+        v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
+        return v1.squared_distance(v2)
+
+    @staticmethod
+    def norm(vector, p):
+        """
+        Find norm of the given vector.
+        """
+        return _convert_to_vector(vector).norm(p)
+
+    @staticmethod
+    def parse(s):
+        """Parse a string representation back into the Vector.
+
+        >>> Vectors.parse('[2,1,2 ]')
+        DenseVector([2.0, 1.0, 2.0])
+        >>> Vectors.parse(' ( 100,  [0],  [2])')
+        SparseVector(100, {0: 2.0})
+        """
+        if s.find('(') == -1 and s.find('[') != -1:
+            return DenseVector.parse(s)
+        elif s.find('(') != -1:
+            return SparseVector.parse(s)
+        else:
+            raise ValueError(
+                "Cannot find tokens '[' or '(' from the input string.")
+
+    @staticmethod
+    def zeros(size):
+        return DenseVector(np.zeros(size))
+
 
 class Matrix(object):
     """
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index d05cfe2af04b2..36a4c7a5408c6 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -24,7 +24,7 @@
 import tempfile
 import array as pyarray
 
-from numpy import array, array_equal, zeros
+from numpy import array, array_equal, zeros, inf
 from py4j.protocol import Py4JJavaError
 
 if sys.version_info[:2] <= (2, 6):
@@ -220,6 +220,29 @@ def test_dense_matrix_is_transposed(self):
         self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
         self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
 
+    def test_parse_vector(self):
+        a = DenseVector([3, 4, 6, 7])
+        self.assertTrue(str(a), '[3.0,4.0,6.0,7.0]')
+        self.assertTrue(Vectors.parse(str(a)), a)
+        a = SparseVector(4, [0, 2], [3, 4])
+        self.assertTrue(str(a), '(4,[0,2],[3.0,4.0])')
+        self.assertTrue(Vectors.parse(str(a)), a)
+        a = SparseVector(10, [0, 1], [4, 5])
+        self.assertTrue(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
+
+    def test_norms(self):
+        a = DenseVector([0, 2, 3, -1])
+        self.assertAlmostEqual(a.norm(2), 3.742, 3)
+        self.assertTrue(a.norm(1), 6)
+        self.assertTrue(a.norm(inf), 3)
+        a = SparseVector(4, [0, 2], [3, -4])
+        self.assertAlmostEqual(a.norm(2), 5)
+        self.assertTrue(a.norm(1), 7)
+        self.assertTrue(a.norm(inf), 4)
+
+        tmp = SparseVector(4, [0, 2], [3, 0])
+        self.assertEqual(tmp.numNonzeros(), 1)
+
 
 class ListTests(MLlibTestCase):
 

From 658a478d3f86456df09d0fbb1ba438fb36d8725c Mon Sep 17 00:00:00 2001
From: Octavian Geagla <ogeagla@gmail.com>
Date: Thu, 7 May 2015 14:49:55 -0700
Subject: [PATCH 019/320] [SPARK-5726] [MLLIB] Elementwise (Hadamard) Vector
 Product Transformer

See https://issues.apache.org/jira/browse/SPARK-5726

Author: Octavian Geagla <ogeagla@gmail.com>
Author: Joseph K. Bradley <joseph@databricks.com>

Closes #4580 from ogeagla/spark-mllib-weighting and squashes the following commits:

fac12ad [Octavian Geagla] [SPARK-5726] [MLLIB] Use new createTransformFunc.
90f7e39 [Joseph K. Bradley] small cleanups
4595165 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove erroneous test case.
ded3ac6 [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks.
37d4705 [Octavian Geagla] [SPARK-5726] [MLLIB] Incorporated feedback.
1dffeee [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks.
e436896 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove 'TF' from 'ElementwiseProductTF'
cb520e6 [Octavian Geagla] [SPARK-5726] [MLLIB] Rename HadamardProduct to ElementwiseProduct
4922722 [Octavian Geagla] [SPARK-5726] [MLLIB] Hadamard Vector Product Transformer
---
 docs/mllib-feature-extraction.md              | 54 ++++++++++++++++
 .../spark/ml/feature/ElementwiseProduct.scala | 55 ++++++++++++++++
 .../mllib/feature/ElementwiseProduct.scala    | 64 +++++++++++++++++++
 .../feature/ElementwiseProductSuite.scala     | 61 ++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala

diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index 80842b27effd8..03fedd01016b9 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -477,3 +477,57 @@ sc.stop();
 </div>
 </div>
 
+## ElementwiseProduct
+
+ElementwiseProduct multiplies each input vector by a provided "weight" vector, using element-wise multiplication. In other words, it scales each column of the dataset by a scalar multiplier.  This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector.
+
+`\[ \begin{pmatrix}
+v_1 \\
+\vdots \\
+v_N
+\end{pmatrix} \circ \begin{pmatrix}
+                    w_1 \\
+                    \vdots \\
+                    w_N
+                    \end{pmatrix}
+= \begin{pmatrix}
+  v_1 w_1 \\
+  \vdots \\
+  v_N w_N
+  \end{pmatrix}
+\]`
+
+[`ElementwiseProduct`](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) has the following parameter in the constructor:
+
+* `w`: the transforming vector.
+
+`ElementwiseProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
+
+### Example
+
+This example below demonstrates how to load a simple vectors file, extract a set of vectors, then transform those vectors using a transforming vector value.
+
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.feature.ElementwiseProduct
+import org.apache.spark.mllib.linalg.Vectors
+
+// Load and parse the data:
+val data = sc.textFile("data/mllib/kmeans_data.txt")
+val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
+
+val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+val transformer = new ElementwiseProduct(transformingVector)
+
+// Batch transform and per-row transform give the same results:
+val transformedData = transformer.transform(parsedData)
+val transformedData2 = parsedData.map(x => transformer.transform(x))
+
+{% endhighlight %}
+</div>
+</div>
+
+
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000000..f8b56293e3ccc
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.Param
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: AlphaComponent ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector.  In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ */
+@AlphaComponent
+class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
+
+  /**
+    * the vector to multiply with input vectors
+    * @group param
+    */
+  val scalingVec: Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product")
+
+  /** @group setParam */
+  def setScalingVec(value: Vector): this.type = set(scalingVec, value)
+
+  /** @group getParam */
+  def getScalingVec: Vector = getOrDefault(scalingVec)
+
+  override protected def createTransformFunc: Vector => Vector = {
+    require(params.contains(scalingVec), s"transformation requires a weight vector")
+    val elemScaler = new feature.ElementwiseProduct($(scalingVec))
+    elemScaler.transform
+  }
+
+  override protected def outputDataType: DataType = new VectorUDT()
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000000..b0985baf9b278
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg._
+
+/**
+ * :: Experimental ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ * @param scalingVector The values used to scale the reference vector's individual components.
+ */
+@Experimental
+class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer {
+
+  /**
+   * Does the hadamard product transformation.
+   *
+   * @param vector vector to be transformed.
+   * @return transformed vector.
+   */
+  override def transform(vector: Vector): Vector = {
+    require(vector.size == scalingVector.size,
+      s"vector sizes do not match: Expected ${scalingVector.size} but found ${vector.size}")
+    vector match {
+      case dv: DenseVector =>
+        val values: Array[Double] = dv.values.clone()
+        val dim = scalingVector.size
+        var i = 0
+        while (i < dim) {
+          values(i) *= scalingVector(i)
+          i += 1
+        }
+        Vectors.dense(values)
+      case SparseVector(size, indices, vs) =>
+        val values = vs.clone()
+        val dim = values.length
+        var i = 0
+        while (i < dim) {
+          values(i) *= scalingVector(indices(i))
+          i += 1
+        }
+        Vectors.sparse(size, indices, values)
+      case v => throw new IllegalArgumentException("Does not support vector type " + v.getClass)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
new file mode 100644
index 0000000000000..f3a482abda873
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+
+class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("elementwise (hadamard) product should properly apply vector to dense data set") {
+    val denseData = Array(
+      Vectors.dense(1.0, 4.0, 1.9, -9.0)
+    )
+    val scalingVec = Vectors.dense(2.0, 0.5, 0.0, 0.25)
+    val transformer = new ElementwiseProduct(scalingVec)
+    val transformedData = transformer.transform(sc.makeRDD(denseData))
+    val transformedVecs = transformedData.collect()
+    val transformedVec = transformedVecs(0)
+    val expectedVec = Vectors.dense(2.0, 2.0, 0.0, -2.25)
+    assert(transformedVec ~== expectedVec absTol 1E-5,
+      s"Expected transformed vector $expectedVec but found $transformedVec")
+  }
+
+  test("elementwise (hadamard) product should properly apply vector to sparse data set") {
+    val sparseData = Array(
+      Vectors.sparse(3, Seq((1, -1.0), (2, -3.0)))
+    )
+    val dataRDD = sc.parallelize(sparseData, 3)
+    val scalingVec = Vectors.dense(1.0, 0.0, 0.5)
+    val transformer = new ElementwiseProduct(scalingVec)
+    val data2 = sparseData.map(transformer.transform)
+    val data2RDD = transformer.transform(dataRDD)
+
+    assert((sparseData, data2, data2RDD.collect()).zipped.forall {
+      case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true
+      case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true
+      case _ => false
+    }, "The vector type should be preserved after hadamard product")
+
+    assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
+    assert(data2(0) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5)
+  }
+}

From e43803b8f477b2c8d28836ac163cb54328d13f1a Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 7 May 2015 15:45:37 -0700
Subject: [PATCH 020/320] [SPARK-6948] [MLLIB] compress vectors in
 VectorAssembler

The compression is based on storage. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #5985 from mengxr/SPARK-6948 and squashes the following commits:

df56a00 [Xiangrui Meng] update python tests
6d90d45 [Xiangrui Meng] compress vectors in VectorAssembler
---
 .../org/apache/spark/ml/feature/VectorAssembler.scala  |  2 +-
 .../apache/spark/ml/feature/VectorAssemblerSuite.scala | 10 +++++++++-
 python/pyspark/ml/feature.py                           |  6 +++---
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index b5a69cee6daf3..796758a70ef18 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -102,6 +102,6 @@ object VectorAssembler {
       case o =>
         throw new SparkException(s"$o of type ${o.getClass.getName} is not supported.")
     }
-    Vectors.sparse(cur, indices.result(), values.result())
+    Vectors.sparse(cur, indices.result(), values.result()).compressed
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index 57d0278e03639..0db27607bc274 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkException
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{Row, SQLContext}
 
@@ -48,6 +48,14 @@ class VectorAssemblerSuite extends FunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("assemble should compress vectors") {
+    import org.apache.spark.ml.feature.VectorAssembler.assemble
+    val v1 = assemble(0.0, 0.0, 0.0, Vectors.dense(4.0))
+    assert(v1.isInstanceOf[SparseVector])
+    val v2 = assemble(1.0, 2.0, 3.0, Vectors.sparse(1, Array(0), Array(4.0)))
+    assert(v2.isInstanceOf[DenseVector])
+  }
+
   test("VectorAssembler") {
     val df = sqlContext.createDataFrame(Seq(
       (0, 0.0, Vectors.dense(1.0, 2.0), "a", Vectors.sparse(2, Array(1), Array(3.0)), 10L)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 8a0fdddd2d9b5..705a368192c24 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -121,12 +121,12 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     >>> df = sc.parallelize([Row(a=1, b=0, c=3)]).toDF()
     >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
     >>> vecAssembler.transform(df).head().features
-    SparseVector(3, {0: 1.0, 2: 3.0})
+    DenseVector([1.0, 0.0, 3.0])
     >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs
-    SparseVector(3, {0: 1.0, 2: 3.0})
+    DenseVector([1.0, 0.0, 3.0])
     >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
     >>> vecAssembler.transform(df, params).head().vector
-    SparseVector(2, {1: 1.0})
+    DenseVector([0.0, 1.0])
     """
 
     _java_class = "org.apache.spark.ml.feature.VectorAssembler"

From 97d1182af63d55abab44521171652c81c56c6af6 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Thu, 7 May 2015 16:21:24 -0700
Subject: [PATCH 021/320] [SQL] [MINOR] make star and multialias extend
 NamedExpression

`Star` and `MultiAlias` just used in `analyzer` and them will be substituted after analyze,  So just like `Alias` they do not need extend `Attribute`

Author: scwf <wangfei1@huawei.com>

Closes #5928 from scwf/attribute and squashes the following commits:

73a0560 [scwf] star and multialias do not need extend attribute
---
 .../sql/catalyst/analysis/unresolved.scala    | 20 +++++--------------
 .../expressions/namedExpressions.scala        |  2 +-
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 3f567e3e8b2a6..eb736ac3290e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -95,7 +95,7 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E
  * Represents all of the input attributes to a given relational operator, for example in
  * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis.
  */
-trait Star extends Attribute with trees.LeafNode[Expression] {
+trait Star extends NamedExpression with trees.LeafNode[Expression] {
   self: Product =>
 
   override def name: String = throw new UnresolvedException(this, "name")
@@ -103,13 +103,9 @@ trait Star extends Attribute with trees.LeafNode[Expression] {
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
+  override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
   override lazy val resolved = false
 
-  override def newInstance(): Star = this
-  override def withNullability(newNullability: Boolean): Star = this
-  override def withQualifiers(newQualifiers: Seq[String]): Star = this
-  override def withName(newName: String): Star = this
-
   // Star gets expanded at runtime so we never evaluate a Star.
   override def eval(input: Row = null): EvaluatedType =
     throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
@@ -154,7 +150,7 @@ case class UnresolvedStar(table: Option[String]) extends Star {
  * @param names the names to be associated with each output of computing [[child]].
  */
 case class MultiAlias(child: Expression, names: Seq[String])
-  extends Attribute with trees.UnaryNode[Expression] {
+  extends NamedExpression with trees.UnaryNode[Expression] {
 
   override def name: String = throw new UnresolvedException(this, "name")
 
@@ -166,15 +162,9 @@ case class MultiAlias(child: Expression, names: Seq[String])
 
   override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers")
 
-  override lazy val resolved = false
-
-  override def newInstance(): MultiAlias = this
+  override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute")
 
-  override def withNullability(newNullability: Boolean): MultiAlias = this
-
-  override def withQualifiers(newQualifiers: Seq[String]): MultiAlias = this
-
-  override def withName(newName: String): MultiAlias = this
+  override lazy val resolved = false
 
   override def eval(input: Row = null): EvaluatedType =
     throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 57ace2a14f77c..a9170589f8c6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -86,7 +86,7 @@ abstract class Attribute extends NamedExpression {
   def withQualifiers(newQualifiers: Seq[String]): Attribute
   def withName(newName: String): Attribute
 
-  def toAttribute: Attribute = this
+  override def toAttribute: Attribute = this
   def newInstance(): Attribute
 
 }

From ea3077f19c18b5556a3632b36771aeb153746ff5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 7 May 2015 16:22:45 -0700
Subject: [PATCH 022/320] [SPARK-7277] [SQL] Throw exception if the property
 mapred.reduce.tasks is set to -1

JIRA: https://issues.apache.org/jira/browse/SPARK-7277

As automatically determining the number of reducers is not supported (`mapred.reduce.tasks` is set to `-1`), we should throw exception to users.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #5811 from viirya/no_neg_reduce_tasks and squashes the following commits:

e518f96 [Liang-Chi Hsieh] Consider other wrong setting values.
fd9c817 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into no_neg_reduce_tasks
4ede705 [Liang-Chi Hsieh] Throw exception instead of warning message.
68a1c70 [Liang-Chi Hsieh] Show warning message if mapred.reduce.tasks is set to -1.
---
 .../org/apache/spark/sql/execution/commands.scala      | 10 ++++++++--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala     | 10 ++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 98df5bef34efa..65687db4e6235 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -84,8 +84,14 @@ case class SetCommand(
       logWarning(
         s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
           s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
-      sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
-      Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
+      if (value.toInt < 1) {
+        val msg = s"Setting negative ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} for automatically " +
+          "determining the number of reducers is not supported."
+        throw new IllegalArgumentException(msg)
+      } else {
+        sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value)
+        Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value"))
+      }
 
     // Configures a single property.
     case Some((key, Some(value))) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 208cec6a32d4d..77be3b8b206c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -871,6 +871,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     conf.clear()
   }
 
+  test("SET commands with illegal or inappropriate argument") {
+    conf.clear()
+    // Set negative mapred.reduce.tasks for automatically determing
+    // the number of reducers is not supported
+    intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-1"))
+    intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-01"))
+    intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-2"))
+    conf.clear()
+  }
+
   test("apply schema") {
     val schema1 = StructType(
       StructField("f1", IntegerType, false) ::

From 937ba798c56770ec54276b9259e47ae65ee93967 Mon Sep 17 00:00:00 2001
From: Iulian Dragos <jaguarul@gmail.com>
Date: Thu, 7 May 2015 16:24:11 -0700
Subject: [PATCH 023/320] [SPARK-5281] [SQL] Registering table on RDD is giving
 MissingRequirementError

Go through the context classloader when reflecting on user types in ScalaReflection.

Replaced calls to `typeOf` with `typeTag[T].in(mirror)`. The convenience method assumes
all types can be found in the classloader that loaded scala-reflect (the primordial
classloader). This assumption is not valid in all contexts (sbt console, Eclipse launchers).

Fixed SPARK-5281

Author: Iulian Dragos <jaguarul@gmail.com>

Closes #5981 from dragos/issue/mirrors-missing-requirement-error and squashes the following commits:

d103e70 [Iulian Dragos] Go through the context classloader when reflecting on user types in ScalaReflection
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 57 ++++++++++++-------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c52965507c715..6998cc8d9666d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.types._
  */
 object ScalaReflection extends ScalaReflection {
   val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+  val mirror: universe.Mirror = universe.runtimeMirror(Thread.currentThread().getContextClassLoader)
 }
 
 /**
@@ -36,6 +37,9 @@ trait ScalaReflection {
   /** The universe we work in (runtime or macro) */
   val universe: scala.reflect.api.Universe
 
+  /** The mirror used to access types in the universe */
+  val mirror: universe.Mirror
+
   import universe._
 
   // The Predef.Map is scala.collection.immutable.Map.
@@ -52,7 +56,19 @@ trait ScalaReflection {
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor[T: TypeTag]: Schema =
-    ScalaReflectionLock.synchronized { schemaFor(typeOf[T]) }
+    ScalaReflectionLock.synchronized { schemaFor(localTypeOf[T]) }
+
+  /**
+   * Return the Scala Type for `T` in the current classloader mirror.
+   *
+   * Use this method instead of the convenience method `universe.typeOf`, which
+   * assumes that all types can be found in the classloader that loaded scala-reflect classes.
+   * That's not necessarily the case when running using Eclipse launchers or even
+   * Sbt console or test (without `fork := true`).
+   *
+   * @see SPARK-5281
+   */
+  private def localTypeOf[T: TypeTag]: `Type` = typeTag[T].in(mirror).tpe
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor(tpe: `Type`): Schema = ScalaReflectionLock.synchronized {
@@ -67,25 +83,25 @@ trait ScalaReflection {
         val udt = Utils.classForName(className)
           .getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         Schema(udt, nullable = true)
-      case t if t <:< typeOf[Option[_]] =>
+      case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
         Schema(schemaFor(optType).dataType, nullable = true)
       // Need to decide if we actually need a special type here.
-      case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
-      case t if t <:< typeOf[Array[_]] =>
+      case t if t <:< localTypeOf[Array[Byte]] => Schema(BinaryType, nullable = true)
+      case t if t <:< localTypeOf[Array[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< typeOf[Seq[_]] =>
+      case t if t <:< localTypeOf[Seq[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, nullable) = schemaFor(elementType)
         Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if t <:< typeOf[Map[_, _]] =>
+      case t if t <:< localTypeOf[Map[_, _]] =>
         val TypeRef(_, _, Seq(keyType, valueType)) = t
         val Schema(valueDataType, valueNullable) = schemaFor(valueType)
         Schema(MapType(schemaFor(keyType).dataType,
           valueDataType, valueContainsNull = valueNullable), nullable = true)
-      case t if t <:< typeOf[Product] =>
+      case t if t <:< localTypeOf[Product] =>
         val formalTypeArgs = t.typeSymbol.asClass.typeParams
         val TypeRef(_, _, actualTypeArgs) = t
         val constructorSymbol = t.member(nme.CONSTRUCTOR)
@@ -107,19 +123,20 @@ trait ScalaReflection {
               schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs))
             StructField(p.name.toString, dataType, nullable)
           }), nullable = true)
-      case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
-      case t if t <:< typeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
-      case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true)
-      case t if t <:< typeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
-      case t if t <:< typeOf[java.math.BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
-      case t if t <:< typeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
-      case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
-      case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true)
-      case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
-      case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true)
-      case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true)
-      case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
-      case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
+      case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true)
+      case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
+      case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true)
+      case t if t <:< localTypeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< localTypeOf[java.math.BigDecimal] =>
+        Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< localTypeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Integer] => Schema(IntegerType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Long] => Schema(LongType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Double] => Schema(DoubleType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Float] => Schema(FloatType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Short] => Schema(ShortType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Byte] => Schema(ByteType, nullable = true)
+      case t if t <:< localTypeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true)
       case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false)
       case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false)
       case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false)

From 35f0173b8f67e2e506fc4575be6430cfb66e2238 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Thu, 7 May 2015 16:26:49 -0700
Subject: [PATCH 024/320] [SPARK-2155] [SQL] [WHEN D THEN E] [ELSE F] add
 CaseKeyWhen for "CASE a WHEN b THEN c * END"

Avoid translating to CaseWhen and evaluate the key expression many times.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #5979 from cloud-fan/condition and squashes the following commits:

3ce54e1 [Wenchen Fan] add CaseKeyWhen
---
 .../apache/spark/sql/catalyst/SqlParser.scala |  10 +-
 .../catalyst/analysis/HiveTypeCoercion.scala  |  43 +++---
 .../sql/catalyst/expressions/Expression.scala |   2 +-
 .../sql/catalyst/expressions/predicates.scala | 135 ++++++++++++------
 .../ExpressionEvaluationSuite.scala           |  26 ++++
 .../spark/sql/DataFrameNaFunctions.scala      |   9 +-
 .../org/apache/spark/sql/hive/HiveQl.scala    |  12 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   7 +
 8 files changed, 159 insertions(+), 85 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 1d3a2dc0d9bb0..b06bfb2ce8c8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -296,13 +296,13 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser {
     | LOWER ~ "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) }
     | IF ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
       { case c ~ t ~ f => If(c, t, f) }
-    | CASE ~> expression.? ~ (WHEN ~> expression ~ (THEN ~> expression)).* ~
+    | CASE ~> expression.? ~ rep1(WHEN ~> expression ~ (THEN ~> expression)) ~
         (ELSE ~> expression).? <~ END ^^ {
           case casePart ~ altPart ~ elsePart =>
-            val altExprs = altPart.flatMap { case whenExpr ~ thenExpr =>
-              Seq(casePart.fold(whenExpr)(EqualTo(_, whenExpr)), thenExpr)
-            }
-            CaseWhen(altExprs ++ elsePart.toList)
+            val branches = altPart.flatMap { case whenExpr ~ thenExpr =>
+              Seq(whenExpr, thenExpr)
+            } ++ elsePart
+            casePart.map(CaseKeyWhen(_, branches)).getOrElse(CaseWhen(branches))
         }
     | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) <~ ")" ^^
       { case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 873c75c525c3b..168a4e30eab86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -631,31 +631,24 @@ trait HiveTypeCoercion {
     import HiveTypeCoercion._
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-      case cw @ CaseWhen(branches) if !cw.resolved && !branches.exists(!_.resolved)  =>
-        val valueTypes = branches.sliding(2, 2).map {
-          case Seq(_, value) => value.dataType
-          case Seq(elseVal) => elseVal.dataType
-        }.toSeq
-
-        logDebug(s"Input values for null casting ${valueTypes.mkString(",")}")
-
-        if (valueTypes.distinct.size > 1) {
-          val commonType = valueTypes.reduce { (v1, v2) =>
-            findTightestCommonType(v1, v2)
-              .getOrElse(sys.error(
-                s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2"))
-          }
-          val transformedBranches = branches.sliding(2, 2).map {
-            case Seq(cond, value) if value.dataType != commonType =>
-              Seq(cond, Cast(value, commonType))
-            case Seq(elseVal) if elseVal.dataType != commonType =>
-              Seq(Cast(elseVal, commonType))
-            case s => s
-          }.reduce(_ ++ _)
-          CaseWhen(transformedBranches)
-        } else {
-          // Types match up.  Hopefully some other rule fixes whatever is wrong with resolution.
-          cw
+      case cw: CaseWhenLike if !cw.resolved && cw.childrenResolved && !cw.valueTypesEqual  =>
+        logDebug(s"Input values for null casting ${cw.valueTypes.mkString(",")}")
+        val commonType = cw.valueTypes.reduce { (v1, v2) =>
+          findTightestCommonType(v1, v2).getOrElse(sys.error(
+            s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2"))
+        }
+        val transformedBranches = cw.branches.sliding(2, 2).map {
+          case Seq(when, value) if value.dataType != commonType =>
+            Seq(when, Cast(value, commonType))
+          case Seq(elseVal) if elseVal.dataType != commonType =>
+            Seq(Cast(elseVal, commonType))
+          case s => s
+        }.reduce(_ ++ _)
+        cw match {
+          case _: CaseWhen =>
+            CaseWhen(transformedBranches)
+          case CaseKeyWhen(key, _) =>
+            CaseKeyWhen(key, transformedBranches)
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 4fd1bc4dd642d..0837a3179d897 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -64,7 +64,7 @@ abstract class Expression extends TreeNode[Expression] {
    * Returns true if  all the children of this expression have been resolved to a specific schema
    * and false if any still contains any unresolved placeholders.
    */
-  def childrenResolved: Boolean = !children.exists(!_.resolved)
+  def childrenResolved: Boolean = children.forall(_.resolved)
 
   /**
    * Returns a string representation of this expression that does not have developer centric
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 26c38c56c04f5..50b0f3ee5f93f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -353,79 +353,134 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
 }
 
+trait CaseWhenLike extends Expression {
+  self: Product =>
+
+  type EvaluatedType = Any
+
+  // Note that `branches` are considered in consecutive pairs (cond, val), and the optional last
+  // element is the value for the default catch-all case (if provided).
+  // Hence, `branches` consists of at least two elements, and can have an odd or even length.
+  def branches: Seq[Expression]
+
+  @transient lazy val whenList =
+    branches.sliding(2, 2).collect { case Seq(whenExpr, _) => whenExpr }.toSeq
+  @transient lazy val thenList =
+    branches.sliding(2, 2).collect { case Seq(_, thenExpr) => thenExpr }.toSeq
+  val elseValue = if (branches.length % 2 == 0) None else Option(branches.last)
+
+  // both then and else val should be considered.
+  def valueTypes: Seq[DataType] = (thenList ++ elseValue).map(_.dataType)
+  def valueTypesEqual: Boolean = valueTypes.distinct.size <= 1
+
+  override def dataType: DataType = {
+    if (!resolved) {
+      throw new UnresolvedException(this, "cannot resolve due to differing types in some branches")
+    }
+    valueTypes.head
+  }
+
+  override def nullable: Boolean = {
+    // If no value is nullable and no elseValue is provided, the whole statement defaults to null.
+    thenList.exists(_.nullable) || (elseValue.map(_.nullable).getOrElse(true))
+  }
+}
+
 // scalastyle:off
 /**
  * Case statements of the form "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END".
  * Refer to this link for the corresponding semantics:
  * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-ConditionalFunctions
- *
- * The other form of case statements "CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END" gets
- * translated to this form at parsing time.  Namely, such a statement gets translated to
- * "CASE WHEN a=b THEN c [WHEN a=d THEN e]* [ELSE f] END".
- *
- * Note that `branches` are considered in consecutive pairs (cond, val), and the optional last
- * element is the value for the default catch-all case (if provided). Hence, `branches` consists of
- * at least two elements, and can have an odd or even length.
  */
 // scalastyle:on
-case class CaseWhen(branches: Seq[Expression]) extends Expression {
-  type EvaluatedType = Any
+case class CaseWhen(branches: Seq[Expression]) extends CaseWhenLike {
+
+  // Use private[this] Array to speed up evaluation.
+  @transient private[this] lazy val branchesArr = branches.toArray
 
   override def children: Seq[Expression] = branches
 
-  override def dataType: DataType = {
-    if (!resolved) {
-      throw new UnresolvedException(this, "cannot resolve due to differing types in some branches")
+  override lazy val resolved: Boolean =
+    childrenResolved &&
+    whenList.forall(_.dataType == BooleanType) &&
+    valueTypesEqual
+
+  /** Written in imperative fashion for performance considerations. */
+  override def eval(input: Row): Any = {
+    val len = branchesArr.length
+    var i = 0
+    // If all branches fail and an elseVal is not provided, the whole statement
+    // defaults to null, according to Hive's semantics.
+    while (i < len - 1) {
+      if (branchesArr(i).eval(input) == true) {
+        return branchesArr(i + 1).eval(input)
+      }
+      i += 2
+    }
+    var res: Any = null
+    if (i == len - 1) {
+      res = branchesArr(i).eval(input)
     }
-    branches(1).dataType
+    return res
   }
 
+  override def toString: String = {
+    "CASE" + branches.sliding(2, 2).map {
+      case Seq(cond, value) => s" WHEN $cond THEN $value"
+      case Seq(elseValue) => s" ELSE $elseValue"
+    }.mkString
+  }
+}
+
+// scalastyle:off
+/**
+ * Case statements of the form "CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END".
+ * Refer to this link for the corresponding semantics:
+ * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-ConditionalFunctions
+ */
+// scalastyle:on
+case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseWhenLike {
+
+  // Use private[this] Array to speed up evaluation.
   @transient private[this] lazy val branchesArr = branches.toArray
-  @transient private[this] lazy val predicates =
-    branches.sliding(2, 2).collect { case Seq(cond, _) => cond }.toSeq
-  @transient private[this] lazy val values =
-    branches.sliding(2, 2).collect { case Seq(_, value) => value }.toSeq
-  @transient private[this] lazy val elseValue =
-    if (branches.length % 2 == 0) None else Option(branches.last)
 
-  override def nullable: Boolean = {
-    // If no value is nullable and no elseValue is provided, the whole statement defaults to null.
-    values.exists(_.nullable) || (elseValue.map(_.nullable).getOrElse(true))
-  }
+  override def children: Seq[Expression] = key +: branches
 
-  override lazy val resolved: Boolean = {
-    if (!childrenResolved) {
-      false
-    } else {
-      val allCondBooleans = predicates.forall(_.dataType == BooleanType)
-      // both then and else val should be considered.
-      val dataTypesEqual = (values ++ elseValue).map(_.dataType).distinct.size <= 1
-      allCondBooleans && dataTypesEqual
-    }
-  }
+  override lazy val resolved: Boolean =
+    childrenResolved && valueTypesEqual
 
   /** Written in imperative fashion for performance considerations. */
   override def eval(input: Row): Any = {
+    val evaluatedKey = key.eval(input)
     val len = branchesArr.length
     var i = 0
     // If all branches fail and an elseVal is not provided, the whole statement
     // defaults to null, according to Hive's semantics.
-    var res: Any = null
     while (i < len - 1) {
-      if (branchesArr(i).eval(input) == true) {
-        res = branchesArr(i + 1).eval(input)
-        return res
+      if (equalNullSafe(evaluatedKey, branchesArr(i).eval(input))) {
+        return branchesArr(i + 1).eval(input)
       }
       i += 2
     }
+    var res: Any = null
     if (i == len - 1) {
       res = branchesArr(i).eval(input)
     }
-    res
+    return res
+  }
+
+  private def equalNullSafe(l: Any, r: Any) = {
+    if (l == null && r == null) {
+      true
+    } else if (l == null || r == null) {
+      false
+    } else {
+      l == r
+    }
   }
 
   override def toString: String = {
-    "CASE" + branches.sliding(2, 2).map {
+    s"CASE $key" + branches.sliding(2, 2).map {
       case Seq(cond, value) => s" WHEN $cond THEN $value"
       case Seq(elseValue) => s" ELSE $elseValue"
     }.mkString
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index faaa55aa5e6f1..88d36d153ceea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -850,6 +850,32 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite {
     assert(CaseWhen(Seq(c2, c4_notNull, c3, c5)).nullable === true)
   }
 
+  test("case key when") {
+    val row = create_row(null, 1, 2, "a", "b", "c")
+    val c1 = 'a.int.at(0)
+    val c2 = 'a.int.at(1)
+    val c3 = 'a.int.at(2)
+    val c4 = 'a.string.at(3)
+    val c5 = 'a.string.at(4)
+    val c6 = 'a.string.at(5)
+
+    val literalNull = Literal.create(null, BooleanType)
+    val literalInt = Literal(1)
+    val literalString = Literal("a")
+
+    checkEvaluation(CaseKeyWhen(c1, Seq(c2, c4, c5)), "b", row)
+    checkEvaluation(CaseKeyWhen(c1, Seq(c2, c4, literalNull, c5, c6)), "b", row)
+    checkEvaluation(CaseKeyWhen(c2, Seq(literalInt, c4, c5)), "a", row)
+    checkEvaluation(CaseKeyWhen(c2, Seq(c1, c4, c5)), "b", row)
+    checkEvaluation(CaseKeyWhen(c4, Seq(literalString, c2, c3)), 1, row)
+    checkEvaluation(CaseKeyWhen(c4, Seq(c1, c3, c5, c2, Literal(3))), 3, row)
+
+    checkEvaluation(CaseKeyWhen(literalInt, Seq(c2, c4, c5)), "a", row)
+    checkEvaluation(CaseKeyWhen(literalString, Seq(c5, c2, c4, c3)), 2, row)
+    checkEvaluation(CaseKeyWhen(literalInt, Seq(c5, c2, c4, c3)), null, row)
+    checkEvaluation(CaseKeyWhen(literalNull, Seq(c5, c2, c1, c3)), 2, row)
+  }
+
   test("complex type") {
     val row = create_row(
       "^Ba*n",                                // 0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 481ed4924857e..4a54120ba86f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -357,11 +357,12 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    * TODO: This can be optimized to use broadcast join when replacementMap is large.
    */
   private def replaceCol(col: StructField, replacementMap: Map[_, _]): Column = {
-    val branches: Seq[Expression] = replacementMap.flatMap { case (source, target) =>
-      df.col(col.name).equalTo(lit(source).cast(col.dataType)).expr ::
-        lit(target).cast(col.dataType).expr :: Nil
+    val keyExpr = df.col(col.name).expr
+    def buildExpr(v: Any) = Cast(Literal(v), keyExpr.dataType)
+    val branches = replacementMap.flatMap { case (source, target) =>
+      Seq(buildExpr(source), buildExpr(target))
     }.toSeq
-    new Column(CaseWhen(branches ++ Seq(df.col(col.name).expr))).as(col.name)
+    new Column(CaseKeyWhen(keyExpr, branches :+ keyExpr)).as(col.name)
   }
 
   private def convertToDouble(v: Any): Double = v match {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4e51473979a2a..6176aee25e7a4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -1246,16 +1246,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
     case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
       CaseWhen(branches.map(nodeToExpr))
     case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
-      val transformed = branches.drop(1).sliding(2, 2).map {
-        case Seq(condVal, value) =>
-          // FIXME (SPARK-2155): the key will get evaluated for multiple times in CaseWhen's eval().
-          // Hence effectful / non-deterministic key expressions are *not* supported at the moment.
-          // We should consider adding new Expressions to get around this.
-          Seq(EqualTo(nodeToExpr(branches(0)), nodeToExpr(condVal)),
-              nodeToExpr(value))
-        case Seq(elseVal) => Seq(nodeToExpr(elseVal))
-      }.toSeq.reduce(_ ++ _)
-      CaseWhen(transformed)
+      val keyExpr = nodeToExpr(branches.head)
+      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
 
     /* Complex datatype manipulation */
     case Token("[", child :: ordinal :: Nil) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 616352d223c5b..c605f1017547a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -751,4 +751,11 @@ class SQLQuerySuite extends QueryTest {
         (6, "c", 0, 6)
       ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
+
+  test("test case key when") {
+    (1 to 5).map(i => (i, i.toString)).toDF("k", "v").registerTempTable("t")
+    checkAnswer(
+      sql("SELECT CASE k WHEN 2 THEN 22 WHEN 4 THEN 44 ELSE 0 END, v FROM t"),
+      Row(0, "1") :: Row(22, "2") :: Row(0, "3") :: Row(44, "4") :: Row(0, "5") :: Nil)
+  }
 }

From 88063c62689135da56ee6678b2e826b88c382732 Mon Sep 17 00:00:00 2001
From: tedyu <yuzhihong@gmail.com>
Date: Thu, 7 May 2015 16:53:59 -0700
Subject: [PATCH 025/320] [SPARK-7450] Use UNSAFE.getLong() to speed up
 BitSetMethods#anySet()

Author: tedyu <yuzhihong@gmail.com>

Closes #5897 from tedyu/master and squashes the following commits:

473bf9d [tedyu] Address Josh's review comments
1719c5b [tedyu] Correct upper bound in for loop
b51dcaf [tedyu] Add unit test in BitSetSuite for BitSet#anySet()
83f9f87 [tedyu] Merge branch 'master' of github.com:apache/spark
817e3f9 [tedyu] Replace constant 8 with SIZE_OF_LONG
75a467b [tedyu] Correct offset for UNSAFE.getLong()
855374b [tedyu] Remove second loop since bitSetWidthInBytes is WORD aligned
093b7a4 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
63ee050 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
4ca0ef6 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
3e9b6919 [tedyu] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
---
 .../main/java/org/apache/spark/unsafe/bitset/BitSet.java  | 8 ++++++++
 .../org/apache/spark/unsafe/bitset/BitSetMethods.java     | 7 ++++---
 .../java/org/apache/spark/unsafe/bitset/BitSetSuite.java  | 7 +++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java
index f72e07fce92fd..28e23da108ebe 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java
@@ -102,4 +102,12 @@ public boolean isSet(int index) {
   public int nextSetBit(int fromIndex) {
     return BitSetMethods.nextSetBit(baseObject, baseOffset, fromIndex, numWords);
   }
+
+  /**
+   * Returns {@code true} if any bit is set.
+   */
+  public boolean anySet() {
+    return BitSetMethods.anySet(baseObject, baseOffset, numWords);
+  }
+
 }
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
index f30626d8f4317..0987191c1c636 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
@@ -70,9 +70,10 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) {
   /**
    * Returns {@code true} if any bit is set.
    */
-  public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) {
-    for (int i = 0; i <= bitSetWidthInBytes; i++) {
-      if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) {
+  public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords) {
+    long addr = baseOffset;
+    for (int i = 0; i < bitSetWidthInWords; i++, addr += WORD_SIZE) {
+      if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) {
         return true;
       }
     }
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java
index e3a824e29b768..18393db9f382f 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java
@@ -39,6 +39,8 @@ public void basicOps() {
     for (int i = 0; i < bs.capacity(); i++) {
       Assert.assertFalse(bs.isSet(i));
     }
+    // another form of asserting that the bit set is empty
+    Assert.assertFalse(bs.anySet());
 
     // Set every bit and check it.
     for (int i = 0; i < bs.capacity(); i++) {
@@ -52,6 +54,11 @@ public void basicOps() {
       bs.unset(i);
       Assert.assertFalse(bs.isSet(i));
     }
+
+    // Make sure anySet() can detect any set bit
+    bs = createBitSet(256);
+    bs.set(64);
+    Assert.assertTrue(bs.anySet());
   }
 
   @Test

From 22ab70e06ede65ca865073fe36c859042a920aa3 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Thu, 7 May 2015 17:34:44 -0700
Subject: [PATCH 026/320] [SPARK-7305] [STREAMING] [WEBUI] Make BatchPage show
 friendly information when jobs are dropped by SparkListener

If jobs are dropped by SparkListener, at least we can show the job ids in BatchPage. Screenshot:

![b1](https://cloud.githubusercontent.com/assets/1000778/7434968/f19aa784-eff3-11e4-8f86-36a073873574.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #5840 from zsxwing/SPARK-7305 and squashes the following commits:

aca0ba6 [zsxwing] Fix the code style
718765e [zsxwing] Make generateNormalJobRow private
8073b03 [zsxwing] Merge branch 'master' into SPARK-7305
83dec11 [zsxwing] Make BatchPage show friendly information when jobs are dropped by SparkListener
---
 .../apache/spark/streaming/ui/BatchPage.scala | 136 ++++++++++++++----
 1 file changed, 106 insertions(+), 30 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 3f1cab69068dc..831f60e870f74 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.ui
 
 import javax.servlet.http.HttpServletRequest
 
-import scala.xml.{NodeSeq, Node}
+import scala.xml.{NodeSeq, Node, Text}
 
 import org.apache.commons.lang3.StringEscapeUtils
 
@@ -28,6 +28,7 @@ import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage}
 import org.apache.spark.streaming.ui.StreamingJobProgressListener.{SparkJobId, OutputOpId}
 import org.apache.spark.ui.jobs.UIData.JobUIData
 
+private case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobUIData: Option[JobUIData])
 
 private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   private val streamingListener = parent.listener
@@ -44,25 +45,33 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       <th>Error</th>
   }
 
+  private def generateJobRow(
+      outputOpId: OutputOpId,
+      outputOpDescription: Seq[Node],
+      formattedOutputOpDuration: String,
+      numSparkJobRowsInOutputOp: Int,
+      isFirstRow: Boolean,
+      sparkJob: SparkJobIdWithUIData): Seq[Node] = {
+    if (sparkJob.jobUIData.isDefined) {
+      generateNormalJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration,
+        numSparkJobRowsInOutputOp, isFirstRow, sparkJob.jobUIData.get)
+    } else {
+      generateDroppedJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration,
+        numSparkJobRowsInOutputOp, isFirstRow, sparkJob.sparkJobId)
+    }
+  }
+
   /**
    * Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into
    * one cell, we use "rowspan" for the first row of a output op.
    */
-  def generateJobRow(
+  private def generateNormalJobRow(
       outputOpId: OutputOpId,
+      outputOpDescription: Seq[Node],
       formattedOutputOpDuration: String,
       numSparkJobRowsInOutputOp: Int,
       isFirstRow: Boolean,
       sparkJob: JobUIData): Seq[Node] = {
-    val lastStageInfo = Option(sparkJob.stageIds)
-      .filter(_.nonEmpty)
-      .flatMap { ids => sparkListener.stageIdToInfo.get(ids.max) }
-    val lastStageData = lastStageInfo.flatMap { s =>
-      sparkListener.stageIdToData.get((s.stageId, s.attemptId))
-    }
-
-    val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
-    val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("")
     val duration: Option[Long] = {
       sparkJob.submissionTime.map { start =>
         val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
@@ -83,9 +92,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       if (isFirstRow) {
         <td class="output-op-id-cell" rowspan={numSparkJobRowsInOutputOp.toString}>{outputOpId.toString}</td>
         <td rowspan={numSparkJobRowsInOutputOp.toString}>
-          <span class="description-input" title={lastStageDescription}>
-            {lastStageDescription}
-          </span>{lastStageName}
+          {outputOpDescription}
         </td>
         <td rowspan={numSparkJobRowsInOutputOp.toString}>{formattedOutputOpDuration}</td>
       } else {
@@ -122,27 +129,96 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
     </tr>
   }
 
-  private def generateOutputOpIdRow(
-      outputOpId: OutputOpId, sparkJobs: Seq[JobUIData]): Seq[Node] = {
-    val sparkjobDurations = sparkJobs.map(sparkJob => {
-      sparkJob.submissionTime.map { start =>
-        val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
-        end - start
+  /**
+   * If a job is dropped by sparkListener due to exceeding the limitation, we only show the job id
+   * with "-" cells.
+   */
+  private def generateDroppedJobRow(
+      outputOpId: OutputOpId,
+      outputOpDescription: Seq[Node],
+      formattedOutputOpDuration: String,
+      numSparkJobRowsInOutputOp: Int,
+      isFirstRow: Boolean,
+      jobId: Int): Seq[Node] = {
+    // In the first row, output op id and its information needs to be shown. In other rows, these
+    // cells will be taken up due to "rowspan".
+    // scalastyle:off
+    val prefixCells =
+      if (isFirstRow) {
+        <td class="output-op-id-cell" rowspan={numSparkJobRowsInOutputOp.toString}>{outputOpId.toString}</td>
+          <td rowspan={numSparkJobRowsInOutputOp.toString}>{outputOpDescription}</td>
+          <td rowspan={numSparkJobRowsInOutputOp.toString}>{formattedOutputOpDuration}</td>
+      } else {
+        Nil
       }
-    })
+    // scalastyle:on
+
+    <tr>
+      {prefixCells}
+      <td sorttable_customkey={jobId.toString}>
+        {jobId.toString}
+      </td>
+      <!-- Duration -->
+      <td>-</td>
+      <!-- Stages: Succeeded/Total -->
+      <td>-</td>
+      <!-- Tasks (for all stages): Succeeded/Total -->
+      <td>-</td>
+      <!-- Error -->
+      <td>-</td>
+    </tr>
+  }
+
+  private def generateOutputOpIdRow(
+      outputOpId: OutputOpId, sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = {
+    // We don't count the durations of dropped jobs
+    val sparkJobDurations = sparkJobs.filter(_.jobUIData.nonEmpty).map(_.jobUIData.get).
+      map(sparkJob => {
+        sparkJob.submissionTime.map { start =>
+          val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
+          end - start
+        }
+      })
     val formattedOutputOpDuration =
-      if (sparkjobDurations.exists(_ == None)) {
-        // If any job does not finish, set "formattedOutputOpDuration" to "-"
+      if (sparkJobDurations.isEmpty || sparkJobDurations.exists(_ == None)) {
+        // If no job or any job does not finish, set "formattedOutputOpDuration" to "-"
         "-"
       } else {
-        SparkUIUtils.formatDuration(sparkjobDurations.flatMap(x => x).sum)
+        SparkUIUtils.formatDuration(sparkJobDurations.flatMap(x => x).sum)
       }
-    generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, true, sparkJobs.head) ++
+
+    val description = generateOutputOpDescription(sparkJobs)
+
+    generateJobRow(
+      outputOpId, description, formattedOutputOpDuration, sparkJobs.size, true, sparkJobs.head) ++
       sparkJobs.tail.map { sparkJob =>
-        generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, false, sparkJob)
+        generateJobRow(
+          outputOpId, description, formattedOutputOpDuration, sparkJobs.size, false, sparkJob)
       }.flatMap(x => x)
   }
 
+  private def generateOutputOpDescription(sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = {
+    val lastStageInfo =
+      sparkJobs.flatMap(_.jobUIData).headOption. // Get the first JobUIData
+        flatMap { sparkJob => // For the first job, get the latest Stage info
+          if (sparkJob.stageIds.isEmpty) {
+            None
+          } else {
+            sparkListener.stageIdToInfo.get(sparkJob.stageIds.max)
+          }
+        }
+    val lastStageData = lastStageInfo.flatMap { s =>
+      sparkListener.stageIdToData.get((s.stageId, s.attemptId))
+    }
+
+    val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
+    val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("")
+
+    <span class="description-input" title={lastStageDescription}>
+      {lastStageDescription}
+    </span> ++ Text(lastStageName)
+  }
+
   private def failureReasonCell(failureReason: String): Seq[Node] = {
     val isMultiline = failureReason.indexOf('\n') >= 0
     // Display the first line by default
@@ -187,10 +263,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         (outputOpId, outputOpIdAndSparkJobIds.map(_.sparkJobId).sorted)
       }
     sparkListener.synchronized {
-      val outputOpIdWithJobs: Seq[(OutputOpId, Seq[JobUIData])] =
+      val outputOpIdWithJobs: Seq[(OutputOpId, Seq[SparkJobIdWithUIData])] =
         outputOpIdToSparkJobIds.map { case (outputOpId, sparkJobIds) =>
-          // Filter out spark Job ids that don't exist in sparkListener
-          (outputOpId, sparkJobIds.flatMap(getJobData))
+          (outputOpId,
+            sparkJobIds.map(sparkJobId => SparkJobIdWithUIData(sparkJobId, getJobData(sparkJobId))))
         }
 
       <table id="batch-job-table" class="table table-bordered table-striped table-condensed">
@@ -200,7 +276,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         <tbody>
           {
             outputOpIdWithJobs.map {
-              case (outputOpId, jobs) => generateOutputOpIdRow(outputOpId, jobs)
+              case (outputOpId, sparkJobIds) => generateOutputOpIdRow(outputOpId, sparkJobIds)
             }
           }
         </tbody>

From cd1d4110cfffb413ab585cf1cc8f1264243cb393 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 7 May 2015 19:36:24 -0700
Subject: [PATCH 027/320] [SPARK-6908] [SQL] Use isolated Hive client

This PR switches Spark SQL's Hive support to use the isolated hive client interface introduced by #5851, instead of directly interacting with the client.  By using this isolated client we can now allow users to dynamically configure the version of Hive that they are connecting to by setting `spark.sql.hive.metastore.version` without the need recompile.  This also greatly reduces the surface area for our interaction with the hive libraries, hopefully making it easier to support other versions in the future.

Jars for the desired hive version can be configured using `spark.sql.hive.metastore.jars`, which accepts the following options:
 - a colon-separated list of jar files or directories for hive and hadoop.
 - `builtin` - attempt to discover the jars that were used to load Spark SQL and use those. This
            option is only valid when using the execution version of Hive.
 - `maven` - download the correct version of hive on demand from maven.

By default, `builtin` is used for Hive 13.

This PR also removes the test step for building against Hive 12, as this will no longer be required to talk to Hive 12 metastores.  However, the full removal of the Shim is deferred until a later PR.

Remaining TODOs:
 - Remove the Hive Shims and inline code for Hive 13.
 - Several HiveCompatibility tests are not yet passing.
  - `nullformatCTAS` - As detailed below, we now are handling CTAS parsing ourselves instead of hacking into the Hive semantic analyzer.  However, we currently only handle the common cases and not things like CTAS where the null format is specified.
  - `combine1` now leaks state about compression somehow, breaking all subsequent tests.  As such we currently add it to the blacklist
  - `part_inherit_tbl_props` and `part_inherit_tbl_props_with_star` do not work anymore.  We are correctly propagating the information
  - "load_dyn_part14.*" - These tests pass when run on their own, but fail when run with all other tests.  It seems our `RESET` mechanism may not be as robust as it used to be?

Other required changes:
 -  `CreateTableAsSelect` no longer carries parts of the HiveQL AST with it through the query execution pipeline.  Instead, we parse CTAS during the HiveQL conversion and construct a `HiveTable`.  The full parsing here is not yet complete as detailed above in the remaining TODOs.  Since the operator is Hive specific, it is moved to the hive package.
 - `Command` is simplified to be a trait that simply acts as a marker for a LogicalPlan that should be eagerly evaluated.

Author: Michael Armbrust <michael@databricks.com>

Closes #5876 from marmbrus/useIsolatedClient and squashes the following commits:

258d000 [Michael Armbrust] really really correct path handling
e56fd4a [Michael Armbrust] getAbsolutePath
5a259f5 [Michael Armbrust] fix typos
81bb366 [Michael Armbrust] comments from vanzin
5f3945e [Michael Armbrust] Merge remote-tracking branch 'origin/master' into useIsolatedClient
4b5cd41 [Michael Armbrust] yin's comments
f5de7de [Michael Armbrust] cleanup
11e9c72 [Michael Armbrust] better coverage in versions suite
7e8f010 [Michael Armbrust] better error messages and jar handling
e7b3941 [Michael Armbrust] more permisive checking for function registration
da91ba7 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into useIsolatedClient
5fe5894 [Michael Armbrust] fix serialization suite
81711c4 [Michael Armbrust] Initial support for running without maven
1d8ae44 [Michael Armbrust] fix final tests?
1c50813 [Michael Armbrust] more comments
a3bee70 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into useIsolatedClient
a6f5df1 [Michael Armbrust] style
ab07f7e [Michael Armbrust] WIP
4d8bf02 [Michael Armbrust] Remove hive 12 compilation
8843a25 [Michael Armbrust] [SPARK-6908] [SQL] Use isolated Hive client
---
 dev/run-tests                                 |  23 -
 project/MimaExcludes.scala                    |   2 +
 project/SparkBuild.scala                      |   9 +-
 .../plans/logical/basicOperators.scala        |  16 +-
 .../sql/catalyst/plans/logical/commands.scala |   8 +-
 .../spark/sql/catalyst/SqlParserSuite.scala   |   6 +-
 .../org/apache/spark/sql/DataFrame.scala      |   1 -
 .../org/apache/spark/sql/SQLContext.scala     |  11 +-
 .../apache/spark/sql/execution/commands.scala |   4 +-
 .../org/apache/spark/sql/sources/ddl.scala    |  16 +-
 .../hive/thriftserver/SparkSQLCLIDriver.scala |  26 +-
 .../sql/hive/thriftserver/SparkSQLEnv.scala   |   9 +-
 .../execution/HiveCompatibilitySuite.scala    |  12 +-
 .../apache/spark/sql/hive/HiveContext.scala   | 283 ++++++------
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 415 ++++++------------
 .../org/apache/spark/sql/hive/HiveQl.scala    | 126 +++++-
 .../apache/spark/sql/hive/TableReader.scala   |  11 +-
 .../sql/hive/client/ClientInterface.scala     |  41 +-
 .../spark/sql/hive/client/ClientWrapper.scala |  99 +++--
 .../hive/client/IsolatedClientLoader.scala    |  23 +-
 .../sql/hive/client/ReflectionMagic.scala     |   8 +
 .../hive/execution/CreateTableAsSelect.scala  |  33 +-
 .../hive/execution/InsertIntoHiveTable.scala  |  33 +-
 .../spark/sql/hive/execution/commands.scala   |  13 +
 .../apache/spark/sql/hive/test/TestHive.scala |  72 ++-
 sql/hive/src/test/resources/log4j.properties  |   2 +-
 .../spark/sql/hive/ErrorPositionSuite.scala   |  22 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  26 +-
 .../spark/sql/hive/SerializationSuite.scala   |   6 +-
 .../spark/sql/hive/client/VersionsSuite.scala |  78 +++-
 .../hive/execution/HiveComparisonTest.scala   |   2 +
 .../sql/hive/execution/HiveQuerySuite.scala   |   2 +-
 .../sql/hive/execution/PruningSuite.scala     |  15 +-
 33 files changed, 782 insertions(+), 671 deletions(-)

diff --git a/dev/run-tests b/dev/run-tests
index 05c63bce4d40d..ef587a1a5988c 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -142,29 +142,6 @@ CURRENT_BLOCK=$BLOCK_BUILD
 
 {
   HIVE_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
-  HIVE_12_BUILD_ARGS="$HIVE_BUILD_ARGS -Phive-0.12.0"
-
-  # First build with Hive 0.12.0 to ensure patches do not break the Hive 0.12.0 build
-  echo "[info] Compile with Hive 0.12.0"
-  [ -d "lib_managed" ] && rm -rf lib_managed
-  echo "[info] Building Spark with these arguments: $HIVE_12_BUILD_ARGS"
-
-  if [ "${AMPLAB_JENKINS_BUILD_TOOL}" == "maven" ]; then
-    build/mvn $HIVE_12_BUILD_ARGS clean package -DskipTests
-  else
-    # NOTE: echo "q" is needed because sbt on encountering a build file with failure
-    # (either resolution or compilation) prompts the user for input either q, r, etc
-    # to quit or retry. This echo is there to make it not block.
-    # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a
-    # single argument!
-    # QUESTION: Why doesn't 'yes "q"' work?
-    # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
-    echo -e "q\n" \
-      | build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
-      | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
-  fi
-
-  # Then build with default Hive version (0.13.1) because tests are based on this version
   echo "[info] Compile with Hive 0.13.1"
   [ -d "lib_managed" ] && rm -rf lib_managed
   echo "[info] Building Spark with these arguments: $HIVE_BUILD_ARGS"
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index bf343d4b7e40b..cfe387faec14b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -89,6 +89,8 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.mllib.linalg.Vector.numActives")
           ) ++ Seq(
+            // Execution should never be included as its always internal.
+            MimaBuild.excludeSparkPackage("sql.execution"),
             // This `protected[sql]` method was removed in 1.3.1
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.sql.SQLContext.checkAnalysis"),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index b4431c7ee05b6..026855f8f6a5a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -193,6 +193,7 @@ object SparkBuild extends PomBuild {
    * Usage: `build/sbt sparkShell`
    */
   val sparkShell = taskKey[Unit]("start a spark-shell.")
+  val sparkSql = taskKey[Unit]("starts the spark sql CLI.")
 
   enable(Seq(
     connectInput in run := true,
@@ -203,6 +204,12 @@ object SparkBuild extends PomBuild {
 
     sparkShell := {
       (runMain in Compile).toTask(" org.apache.spark.repl.Main -usejavacp").value
+    },
+
+    javaOptions in Compile += "-Dspark.master=local",
+
+    sparkSql := {
+      (runMain in Compile).toTask(" org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver").value
     }
   ))(assembly)
 
@@ -497,7 +504,7 @@ object TestSettings {
     // Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
     // launched by the tests have access to the correct test-time classpath.
     envVars in Test ++= Map(
-      "SPARK_DIST_CLASSPATH" -> 
+      "SPARK_DIST_CLASSPATH" ->
         (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
       "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
     javaOptions in Test += "-Dspark.test.home=" + sparkHome,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index ba0abb2df596c..0f349f9d11415 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -149,16 +149,6 @@ case class InsertIntoTable(
   }
 }
 
-case class CreateTableAsSelect[T](
-    databaseName: Option[String],
-    tableName: String,
-    child: LogicalPlan,
-    allowExisting: Boolean,
-    desc: Option[T] = None) extends UnaryNode {
-  override def output: Seq[Attribute] = Seq.empty[Attribute]
-  override lazy val resolved: Boolean = databaseName != None && childrenResolved
-}
-
 /**
  * A container for holding named common table expressions (CTEs) and a query plan.
  * This operator will be removed during analysis and the relations will be substituted into child.
@@ -184,10 +174,10 @@ case class WriteToFile(
 }
 
 /**
- * @param order  The ordering expressions 
- * @param global True means global sorting apply for entire data set, 
+ * @param order  The ordering expressions
+ * @param global True means global sorting apply for entire data set,
  *               False means sorting only apply within the partition.
- * @param child  Child logical plan              
+ * @param child  Child logical plan
  */
 case class Sort(
     order: Seq[SortOrder],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
index 45905f8ef98c5..246f4d7e34d3d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -21,9 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 
 /**
  * A logical node that represents a non-query command to be executed by the system.  For example,
- * commands can be used by parsers to represent DDL operations.
+ * commands can be used by parsers to represent DDL operations.  Commands, unlike queries, are
+ * eagerly executed.
  */
-abstract class Command extends LeafNode {
-  self: Product =>
-  def output: Seq[Attribute] = Seq.empty
-}
+trait Command
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
index a652c70560990..890ea2a84b82e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.sql.catalyst
 
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.logical.Command
 import org.scalatest.FunSuite
 
-private[sql] case class TestCommand(cmd: String) extends Command
+private[sql] case class TestCommand(cmd: String) extends LogicalPlan with Command {
+  override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
+}
 
 private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser {
   protected val EXECUTE   = Keyword("THISISASUPERLONGKEYWORDTEST")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 79fbf50300d4d..7947042c14299 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -143,7 +143,6 @@ class DataFrame private[sql](
     // happen right away to let these side effects take place eagerly.
     case _: Command |
          _: InsertIntoTable |
-         _: CreateTableAsSelect[_] |
          _: CreateTableUsingAsSelect |
          _: WriteToFile =>
       LogicalRDD(queryExecution.analyzed.output, queryExecution.toRdd)(sqlContext)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0563430a6fdc7..0ac0936f0f592 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -70,7 +70,7 @@ import org.apache.spark.{Partition, SparkContext}
  *   spark-sql> SELECT * FROM src LIMIT 1;
  *
  *-- Exception will be thrown and switch to dialect
- *-- "sql" (for SQLContext) or 
+ *-- "sql" (for SQLContext) or
  *-- "hiveql" (for HiveContext)
  * }}}
  */
@@ -107,7 +107,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * @return Spark SQL configuration
    */
-  protected[sql] def conf = tlSession.get().conf
+  protected[sql] def conf = currentSession().conf
 
   /**
    * Set Spark SQL configuration properties.
@@ -1197,13 +1197,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
          |${stringOrError(executedPlan)}
       """.stripMargin.trim
 
-    override def toString: String =
+    override def toString: String = {
+      def output =
+        analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ")
+
       // TODO previously will output RDD details by run (${stringOrError(toRdd.toDebugString)})
       // however, the `toRdd` will cause the real execution, which is not what we want.
       // We need to think about how to avoid the side effect.
       s"""== Parsed Logical Plan ==
          |${stringOrError(logical)}
          |== Analyzed Logical Plan ==
+         |${stringOrError(output)}
          |${stringOrError(analyzed)}
          |== Optimized Logical Plan ==
          |${stringOrError(optimizedPlan)}
@@ -1212,6 +1216,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
          |Code Generation: ${stringOrError(executedPlan.codegenEnabled)}
          |== RDD ==
       """.stripMargin.trim
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 65687db4e6235..388a8184e4cfe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -32,9 +32,11 @@ import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext}
  * A logical command that is executed for its side-effects.  `RunnableCommand`s are
  * wrapped in `ExecutedCommand` during execution.
  */
-trait RunnableCommand extends logical.Command {
+private[sql] trait RunnableCommand extends LogicalPlan with logical.Command {
   self: Product =>
 
+  override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
   def run(sqlContext: SQLContext): Seq[Row]
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 1abf3aa51cb25..06c64f2bdd59e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -269,8 +269,10 @@ private[sql] case class ResolvedDataSource(provider: Class[_], relation: BaseRel
  */
 private[sql] case class DescribeCommand(
     table: LogicalPlan,
-    isExtended: Boolean) extends Command {
-  override val output = Seq(
+    isExtended: Boolean) extends LogicalPlan with Command {
+
+  override def children: Seq[LogicalPlan] = Seq.empty
+  override val output: Seq[Attribute] = Seq(
     // Column names are based on Hive.
     AttributeReference("col_name", StringType, nullable = false,
       new MetadataBuilder().putString("comment", "name of the column").build())(),
@@ -292,7 +294,11 @@ private[sql] case class CreateTableUsing(
     temporary: Boolean,
     options: Map[String, String],
     allowExisting: Boolean,
-    managedIfNoPath: Boolean) extends Command
+    managedIfNoPath: Boolean) extends LogicalPlan with Command {
+
+  override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
+}
 
 /**
  * A node used to support CTAS statements and saveAsTable for the data source API.
@@ -318,7 +324,7 @@ private[sql] case class CreateTempTableUsing(
     provider: String,
     options: Map[String, String]) extends RunnableCommand {
 
-  def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
@@ -333,7 +339,7 @@ private[sql] case class CreateTempTableUsingAsSelect(
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
-  def run(sqlContext: SQLContext): Seq[Row] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val df = DataFrame(sqlContext, query)
     val resolved = ResolvedDataSource(sqlContext, provider, mode, options, df)
     sqlContext.registerDataFrameAsTable(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index b7b6925aa87f7..deb1008c468bf 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -37,7 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.hive.HiveShim
+import org.apache.spark.sql.hive.{HiveContext, HiveShim}
 import org.apache.spark.util.Utils
 
 private[hive] object SparkSQLCLIDriver {
@@ -74,7 +74,12 @@ private[hive] object SparkSQLCLIDriver {
       System.exit(1)
     }
 
-    val sessionState = new CliSessionState(new HiveConf(classOf[SessionState]))
+    val cliConf = new HiveConf(classOf[SessionState])
+    // Override the location of the metastore since this is only used for local execution.
+    HiveContext.newTemporaryConfiguration().foreach {
+      case (key, value) => cliConf.set(key, value)
+    }
+    val sessionState = new CliSessionState(cliConf)
 
     sessionState.in = System.in
     try {
@@ -91,10 +96,14 @@ private[hive] object SparkSQLCLIDriver {
 
     // Set all properties specified via command line.
     val conf: HiveConf = sessionState.getConf
-    sessionState.cmdProperties.entrySet().foreach { item: java.util.Map.Entry[Object, Object] =>
-      conf.set(item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String])
-      sessionState.getOverriddenConfigurations.put(
-        item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String])
+    sessionState.cmdProperties.entrySet().foreach { item =>
+      val key = item.getKey.asInstanceOf[String]
+      val value = item.getValue.asInstanceOf[String]
+      // We do not propagate metastore options to the execution copy of hive.
+      if (key != "javax.jdo.option.ConnectionURL") {
+        conf.set(key, value)
+        sessionState.getOverriddenConfigurations.put(key, value)
+      }
     }
 
     SessionState.start(sessionState)
@@ -138,8 +147,9 @@ private[hive] object SparkSQLCLIDriver {
       case e: UnsupportedEncodingException => System.exit(3)
     }
 
-    // use the specified database if specified
-    cli.processSelectDatabase(sessionState);
+    if (sessionState.database != null) {
+      SparkSQLEnv.hiveContext.runSqlHive(s"USE ${sessionState.database}")
+    }
 
     // Execute -i init files (always in silent mode)
     cli.processInitFiles(sessionState)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 97b46a01ba5b4..7c0c505e2d61e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
+import java.io.PrintStream
+
 import scala.collection.JavaConversions._
 
 import org.apache.spark.scheduler.StatsReportListener
@@ -39,7 +41,6 @@ private[hive] object SparkSQLEnv extends Logging {
 
       sparkConf
         .setAppName(s"SparkSQL::${Utils.localHostName()}")
-        .set("spark.sql.hive.version", HiveShim.version)
         .set(
           "spark.serializer",
           maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer"))
@@ -51,6 +52,12 @@ private[hive] object SparkSQLEnv extends Logging {
       sparkContext.addSparkListener(new StatsReportListener())
       hiveContext = new HiveContext(sparkContext)
 
+      hiveContext.metadataHive.setOut(new PrintStream(System.out, true, "UTF-8"))
+      hiveContext.metadataHive.setInfo(new PrintStream(System.err, true, "UTF-8"))
+      hiveContext.metadataHive.setError(new PrintStream(System.err, true, "UTF-8"))
+
+      hiveContext.setConf("spark.sql.hive.version", HiveShim.version)
+
       if (log.isDebugEnabled) {
         hiveContext.hiveconf.getAllProperties.toSeq.sorted.foreach { case (k, v) =>
           logDebug(s"HiveConf var: $k=$v")
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 5e411c2fdba9d..b6245a57074c8 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -240,7 +240,17 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
 
     // It has a bug and it has been fixed by
     // https://issues.apache.org/jira/browse/HIVE-7673 (in Hive 0.14 and trunk).
-    "input46"
+    "input46",
+
+    // These tests were broken by the hive client isolation PR.
+    "part_inherit_tbl_props",
+    "part_inherit_tbl_props_with_star",
+
+    "nullformatCTAS", // SPARK-7411: need to finish CTAS parser
+
+    // The isolated classloader seemed to make some of our test reset mechanisms less robust.
+    "combine1", // This test changes compression settings in a way that breaks all subsequent tests.
+    "load_dyn_part14.*" // These work alone but fail when run with other tests...
   ) ++ HiveShim.compatibilityBlackList
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index f25723e53f07c..538c6c7f0a200 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -17,8 +17,9 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{BufferedReader, InputStreamReader, PrintStream}
+import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
 import java.sql.Timestamp
+import java.util.{ArrayList => JArrayList}
 
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution
 import org.apache.spark.sql.catalyst.Dialect
@@ -35,15 +36,19 @@ import org.apache.hadoop.hive.ql.processors._
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateSubQueries, OverrideCatalog, OverrideFunctionRegistry}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, QueryExecutionException, SetCommand}
+import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand}
 import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy}
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
 
 /**
  * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext
@@ -61,6 +66,8 @@ private[hive] class HiveQLDialect extends Dialect {
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
 
+  import HiveContext._
+
   /**
    * When true, enables an experimental feature where metastore tables that use the parquet SerDe
    * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
@@ -93,9 +100,118 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   protected[sql] def convertCTAS: Boolean =
     getConf("spark.sql.hive.convertCTAS", "false").toBoolean
 
+  /**
+   * The version of the hive client that will be used to communicate with the metastore.  Note that
+   * this does not necessarily need to be the same version of Hive that is used internally by
+   * Spark SQL for execution.
+   */
+  protected[hive] def hiveMetastoreVersion: String =
+    getConf(HIVE_METASTORE_VERSION, hiveExecutionVersion)
+
+  /**
+   * The location of the jars that should be used to instantiate the HiveMetastoreClient.  This
+   * property can be one of three options:
+   *  - a classpath in the standard format for both hive and hadoop.
+   *  - builtin - attempt to discover the jars that were used to load Spark SQL and use those. This
+   *              option is only valid when using the execution version of Hive.
+   *  - maven - download the correct version of hive on demand from maven.
+   */
+  protected[hive] def hiveMetastoreJars: String =
+    getConf(HIVE_METASTORE_JARS, "builtin")
+
   @transient
   protected[sql] lazy val substitutor = new VariableSubstitution()
 
+  /**
+   * The copy of the hive client that is used for execution.  Currently this must always be
+   * Hive 13 as this is the version of Hive that is packaged with Spark SQL.  This copy of the
+   * client is used for execution related tasks like registering temporary functions or ensuring
+   * that the ThreadLocal SessionState is correctly populated.  This copy of Hive is *not* used
+   * for storing peristent metadata, and only point to a dummy metastore in a temporary directory.
+   */
+  @transient
+  protected[hive] lazy val executionHive: ClientWrapper = {
+    logInfo(s"Initilizing execution hive, version $hiveExecutionVersion")
+    new ClientWrapper(
+      version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion),
+      config = newTemporaryConfiguration())
+  }
+  SessionState.setCurrentSessionState(executionHive.state)
+
+  /**
+   * The copy of the Hive client that is used to retrieve metadata from the Hive MetaStore.
+   * The version of the Hive client that is used here must match the metastore that is configured
+   * in the hive-site.xml file.
+   */
+  @transient
+  protected[hive] lazy val metadataHive: ClientInterface = {
+    val metaVersion = IsolatedClientLoader.hiveVersion(hiveMetastoreVersion)
+
+    // We instantiate a HiveConf here to read in the hive-site.xml file and then pass the options
+    // into the isolated client loader
+    val metadataConf = new HiveConf()
+    // `configure` goes second to override other settings.
+    val allConfig = metadataConf.iterator.map(e => e.getKey -> e.getValue).toMap ++ configure
+
+    val isolatedLoader = if (hiveMetastoreJars == "builtin") {
+      if (hiveExecutionVersion != hiveMetastoreVersion) {
+        throw new IllegalArgumentException(
+          "Builtin jars can only be used when hive execution version == hive metastore version. " +
+          s"Execution: ${hiveExecutionVersion} != Metastore: ${hiveMetastoreVersion}. " +
+          "Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " +
+          s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
+      }
+      val jars = getClass.getClassLoader match {
+        case urlClassLoader: java.net.URLClassLoader => urlClassLoader.getURLs
+        case other =>
+          throw new IllegalArgumentException(
+            "Unable to locate hive jars to connect to metastore " +
+            s"using classloader ${other.getClass.getName}. " +
+            "Please set spark.sql.hive.metastore.jars")
+      }
+
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using Spark classes.")
+      new IsolatedClientLoader(
+        version = metaVersion,
+        execJars = jars.toSeq,
+        config = allConfig,
+        isolationOn = true)
+    } else if (hiveMetastoreJars == "maven") {
+      // TODO: Support for loading the jars from an already downloaded location.
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.")
+      IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig )
+    } else {
+      // Convert to files and expand any directories.
+      val jars =
+        hiveMetastoreJars
+          .split(File.pathSeparator)
+          .flatMap {
+            case path if new File(path).getName() == "*" =>
+              val files = new File(path).getParentFile().listFiles()
+              if (files == null) {
+                logWarning(s"Hive jar path '$path' does not exist.")
+                Nil
+              } else {
+                files.filter(_.getName().toLowerCase().endsWith(".jar"))
+              }
+            case path =>
+              new File(path) :: Nil
+          }
+          .map(_.toURI.toURL)
+
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using $jars")
+      new IsolatedClientLoader(
+        version = metaVersion,
+        execJars = jars.toSeq,
+        config = allConfig,
+        isolationOn = true)
+    }
+    isolatedLoader.client
+  }
+
   protected[sql] override def parseSql(sql: String): LogicalPlan = {
     super.parseSql(substitutor.substitute(hiveconf, sql))
   }
@@ -178,15 +294,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         // recorded in the Hive metastore.
         // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
         if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
-          tableParameters.put(HiveShim.getStatsSetupConstTotalSize, newTotalSize.toString)
-          val hiveTTable = relation.hiveQlTable.getTTable
-          hiveTTable.setParameters(tableParameters)
-          val tableFullName =
-            relation.hiveQlTable.getDbName + "." + relation.hiveQlTable.getTableName
-
-          catalog.synchronized {
-            catalog.client.alterTable(tableFullName, new Table(hiveTTable))
-          }
+          catalog.client.alterTable(
+            relation.table.copy(
+              properties = relation.table.properties +
+                (HiveShim.getStatsSetupConstTotalSize -> newTotalSize.toString)))
         }
       case otherRelation =>
         throw new UnsupportedOperationException(
@@ -194,47 +305,19 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     }
   }
 
-  // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
-  @transient
-  protected lazy val outputBuffer = new java.io.OutputStream {
-    var pos: Int = 0
-    var buffer = new Array[Int](10240)
-    def write(i: Int): Unit = {
-      buffer(pos) = i
-      pos = (pos + 1) % buffer.size
-    }
-
-    override def toString: String = {
-      val (end, start) = buffer.splitAt(pos)
-      val input = new java.io.InputStream {
-        val iterator = (start ++ end).iterator
-
-        def read(): Int = if (iterator.hasNext) iterator.next() else -1
-      }
-      val reader = new BufferedReader(new InputStreamReader(input))
-      val stringBuilder = new StringBuilder
-      var line = reader.readLine()
-      while(line != null) {
-        stringBuilder.append(line)
-        stringBuilder.append("\n")
-        line = reader.readLine()
-      }
-      stringBuilder.toString()
-    }
-  }
-
-  protected[hive] def sessionState = tlSession.get().asInstanceOf[this.SQLSession].sessionState
-
   protected[hive] def hiveconf = tlSession.get().asInstanceOf[this.SQLSession].hiveconf
 
   override def setConf(key: String, value: String): Unit = {
     super.setConf(key, value)
-    runSqlHive(s"SET $key=$value")
+    hiveconf.set(key, value)
+    executionHive.runSqlHive(s"SET $key=$value")
+    metadataHive.runSqlHive(s"SET $key=$value")
   }
 
   /* A catalyst metadata catalog that points to the Hive Metastore. */
   @transient
-  override protected[sql] lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog
+  override protected[sql] lazy val catalog =
+    new HiveMetastoreCatalog(metadataHive, this) with OverrideCatalog
 
   // Note that HiveUDFs will be overridden by functions registered in this context.
   @transient
@@ -261,16 +344,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     new this.SQLSession()
   }
 
+  /** Overridden by child classes that need to set configuration before the client init. */
+  protected def configure(): Map[String, String] = Map.empty
+
   protected[hive] class SQLSession extends super.SQLSession {
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
     }
 
-    protected[hive] lazy val hiveconf: HiveConf = {
-      setConf(sessionState.getConf.getAllProperties)
-      sessionState.getConf
-    }
-
     /**
      * SQLConf and HiveConf contracts:
      *
@@ -285,78 +366,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         state = new SessionState(new HiveConf(classOf[SessionState]))
         SessionState.start(state)
       }
-      if (state.out == null) {
-        state.out = new PrintStream(outputBuffer, true, "UTF-8")
-      }
-      if (state.err == null) {
-        state.err = new PrintStream(outputBuffer, true, "UTF-8")
-      }
       state
     }
-  }
-
-  /**
-   * Runs the specified SQL query using Hive.
-   */
-  protected[sql] def runSqlHive(sql: String): Seq[String] = {
-    val maxResults = 100000
-    val results = runHive(sql, maxResults)
-    // It is very confusing when you only get back some of the results...
-    if (results.size == maxResults) sys.error("RESULTS POSSIBLY TRUNCATED")
-    results
-  }
-
-  /**
-   * Execute the command using Hive and return the results as a sequence. Each element
-   * in the sequence is one row.
-   */
-  protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = synchronized {
-    try {
-      val cmd_trimmed: String = cmd.trim()
-      val tokens: Array[String] = cmd_trimmed.split("\\s+")
-      val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
-      val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hiveconf)
-
-      // Makes sure the session represented by the `sessionState` field is activated. This implies
-      // Spark SQL Hive support uses a single `SessionState` for all Hive operations and breaks
-      // session isolation under multi-user scenarios (i.e. HiveThriftServer2).
-      // TODO Fix session isolation
-      if (SessionState.get() != sessionState) {
-        SessionState.start(sessionState)
-      }
 
-      proc match {
-        case driver: Driver =>
-          val results = HiveShim.createDriverResultsArray
-          val response: CommandProcessorResponse = driver.run(cmd)
-          // Throw an exception if there is an error in query processing.
-          if (response.getResponseCode != 0) {
-            driver.close()
-            throw new QueryExecutionException(response.getErrorMessage)
-          }
-          driver.setMaxRows(maxRows)
-          driver.getResults(results)
-          driver.close()
-          HiveShim.processResults(results)
-        case _ =>
-          if (sessionState.out != null) {
-            sessionState.out.println(tokens(0) + " " + cmd_1)
-          }
-          Seq(proc.run(cmd_1).getResponseCode.toString)
-      }
-    } catch {
-      case e: Exception =>
-        logError(
-          s"""
-            |======================
-            |HIVE FAILURE OUTPUT
-            |======================
-            |${outputBuffer.toString}
-            |======================
-            |END HIVE FAILURE OUTPUT
-            |======================
-          """.stripMargin)
-        throw e
+    protected[hive] lazy val hiveconf: HiveConf = {
+      setConf(sessionState.getConf.getAllProperties)
+      sessionState.getConf
     }
   }
 
@@ -391,17 +406,23 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     )
   }
 
+  protected[hive] def runSqlHive(sql: String): Seq[String] = {
+    if (sql.toLowerCase.contains("create temporary function")) {
+      executionHive.runSqlHive(sql)
+    } else if (sql.trim.toLowerCase.startsWith("set")) {
+      metadataHive.runSqlHive(sql)
+      executionHive.runSqlHive(sql)
+    } else {
+      metadataHive.runSqlHive(sql)
+    }
+  }
+
   @transient
   override protected[sql] val planner = hivePlanner
 
   /** Extends QueryExecution with hive specific features. */
   protected[sql] class QueryExecution(logicalPlan: LogicalPlan)
     extends super.QueryExecution(logicalPlan) {
-    // Like what we do in runHive, makes sure the session represented by the
-    // `sessionState` field is activated.
-    if (SessionState.get() != sessionState) {
-      SessionState.start(sessionState)
-    }
 
     /**
      * Returns the result as a hive compatible sequence of strings.  For native commands, the
@@ -439,7 +460,21 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 }
 
 
-private object HiveContext {
+private[hive] object HiveContext {
+  /** The version of hive used internally by Spark SQL. */
+  val hiveExecutionVersion: String = "0.13.1"
+
+  val HIVE_METASTORE_VERSION: String = "spark.sql.hive.metastore.version"
+  val HIVE_METASTORE_JARS: String = "spark.sql.hive.metastore.jars"
+
+  /** Constructs a configuration for hive, where the metastore is located in a temp directory. */
+  def newTemporaryConfiguration(): Map[String, String] = {
+    val tempDir = Utils.createTempDir()
+    val localMetastore = new File(tempDir, "metastore").getAbsolutePath
+    Map(
+      "javax.jdo.option.ConnectionURL" -> s"jdbc:derby:;databaseName=$localMetastore;create=true")
+  }
+
   protected val primitiveTypes =
     Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
       ShortType, DateType, TimestampType, BinaryType)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 4d222cf88e5e8..8fcdf3d0ab119 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -22,6 +22,8 @@ import java.util.{List => JList}
 
 import com.google.common.base.Objects
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
+
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable}
 import org.apache.hadoop.hive.metastore.{TableType, Warehouse}
 import org.apache.hadoop.hive.ql.metadata._
@@ -32,6 +34,7 @@ import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark.Logging
+import org.apache.spark.sql.hive.client.IsolatedClientLoader
 import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
@@ -39,6 +42,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
 import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource}
 import org.apache.spark.sql.types._
@@ -47,11 +51,10 @@ import org.apache.spark.util.Utils
 /* Implicit conversions */
 import scala.collection.JavaConversions._
 
-private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
-  import org.apache.spark.sql.hive.HiveMetastoreTypes._
+private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext)
+  extends Catalog with Logging {
 
-  /** Connection to hive metastore.  Usages should lock on `this`. */
-  protected[hive] val client = Hive.get(hive.hiveconf)
+  import org.apache.spark.sql.hive.HiveMetastoreTypes._
 
   /** Usages should lock on `this`. */
   protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf)
@@ -67,14 +70,12 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
     val cacheLoader = new CacheLoader[QualifiedTableName, LogicalPlan]() {
       override def load(in: QualifiedTableName): LogicalPlan = {
         logDebug(s"Creating new cached data source for $in")
-        val table = HiveMetastoreCatalog.this.synchronized {
-          client.getTable(in.database, in.name)
-        }
+        val table = client.getTable(in.database, in.name)
 
         def schemaStringFromParts: Option[String] = {
-          Option(table.getProperty("spark.sql.sources.schema.numParts")).map { numParts =>
+          table.properties.get("spark.sql.sources.schema.numParts").map { numParts =>
             val parts = (0 until numParts.toInt).map { index =>
-              val part = table.getProperty(s"spark.sql.sources.schema.part.${index}")
+              val part = table.properties.get(s"spark.sql.sources.schema.part.${index}").orNull
               if (part == null) {
                 throw new AnalysisException(
                   s"Could not read schema from the metastore because it is corrupted " +
@@ -92,20 +93,20 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
         // After SPARK-6024, we removed this flag.
         // Although we are not using spark.sql.sources.schema any more, we need to still support.
         val schemaString =
-          Option(table.getProperty("spark.sql.sources.schema")).orElse(schemaStringFromParts)
+          table.properties.get("spark.sql.sources.schema").orElse(schemaStringFromParts)
 
         val userSpecifiedSchema =
           schemaString.map(s => DataType.fromJson(s).asInstanceOf[StructType])
 
         // It does not appear that the ql client for the metastore has a way to enumerate all the
         // SerDe properties directly...
-        val options = table.getTTable.getSd.getSerdeInfo.getParameters.toMap
+        val options = table.serdeProperties
 
         val resolvedRelation =
           ResolvedDataSource(
             hive,
             userSpecifiedSchema,
-            table.getProperty("spark.sql.sources.provider"),
+            table.properties("spark.sql.sources.provider"),
             options)
 
         LogicalRelation(resolvedRelation.relation)
@@ -144,49 +145,53 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
       options: Map[String, String],
       isExternal: Boolean): Unit = {
     val (dbName, tblName) = processDatabaseAndTableName("default", tableName)
-    val tbl = new Table(dbName, tblName)
-
-    tbl.setProperty("spark.sql.sources.provider", provider)
+    val tableProperties = new scala.collection.mutable.HashMap[String, String]
+    tableProperties.put("spark.sql.sources.provider", provider)
     if (userSpecifiedSchema.isDefined) {
       val threshold = hive.conf.schemaStringLengthThreshold
       val schemaJsonString = userSpecifiedSchema.get.json
       // Split the JSON string.
       val parts = schemaJsonString.grouped(threshold).toSeq
-      tbl.setProperty("spark.sql.sources.schema.numParts", parts.size.toString)
+      tableProperties.put("spark.sql.sources.schema.numParts", parts.size.toString)
       parts.zipWithIndex.foreach { case (part, index) =>
-        tbl.setProperty(s"spark.sql.sources.schema.part.${index}", part)
+        tableProperties.put(s"spark.sql.sources.schema.part.${index}", part)
       }
     }
-    options.foreach { case (key, value) => tbl.setSerdeParam(key, value) }
 
-    if (isExternal) {
-      tbl.setProperty("EXTERNAL", "TRUE")
-      tbl.setTableType(TableType.EXTERNAL_TABLE)
+    val tableType = if (isExternal) {
+      tableProperties.put("EXTERNAL", "TRUE")
+      ExternalTable
     } else {
-      tbl.setProperty("EXTERNAL", "FALSE")
-      tbl.setTableType(TableType.MANAGED_TABLE)
-    }
-
-    // create the table
-    synchronized {
-      client.createTable(tbl, false)
-    }
+      tableProperties.put("EXTERNAL", "FALSE")
+      ManagedTable
+    }
+
+    client.createTable(
+      HiveTable(
+        specifiedDatabase = Option(dbName),
+        name = tblName,
+        schema = Seq.empty,
+        partitionColumns = Seq.empty,
+        tableType = tableType,
+        properties = tableProperties.toMap,
+        serdeProperties = options))
   }
 
-  def hiveDefaultTableFilePath(tableName: String): String = synchronized {
-    val currentDatabase = client.getDatabase(hive.sessionState.getCurrentDatabase)
-
-    hiveWarehouse.getTablePath(currentDatabase, tableName).toString
+  def hiveDefaultTableFilePath(tableName: String): String = {
+    // Code based on: hiveWarehouse.getTablePath(currentDatabase, tableName)
+    new Path(
+      new Path(client.getDatabase(client.currentDatabase).location),
+      tableName.toLowerCase).toString
   }
 
-  def tableExists(tableIdentifier: Seq[String]): Boolean = synchronized {
+  def tableExists(tableIdentifier: Seq[String]): Boolean = {
     val tableIdent = processTableIdentifier(tableIdentifier)
     val databaseName =
       tableIdent
         .lift(tableIdent.size - 2)
-        .getOrElse(hive.sessionState.getCurrentDatabase)
+        .getOrElse(client.currentDatabase)
     val tblName = tableIdent.last
-    client.getTable(databaseName, tblName, false) != null
+    client.getTableOption(databaseName, tblName).isDefined
   }
 
   def lookupRelation(
@@ -194,18 +199,11 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
       alias: Option[String]): LogicalPlan = {
     val tableIdent = processTableIdentifier(tableIdentifier)
     val databaseName = tableIdent.lift(tableIdent.size - 2).getOrElse(
-      hive.sessionState.getCurrentDatabase)
+      client.currentDatabase)
     val tblName = tableIdent.last
-    val table = try {
-      synchronized {
-        client.getTable(databaseName, tblName)
-      }
-    } catch {
-      case te: org.apache.hadoop.hive.ql.metadata.InvalidTableException =>
-        throw new NoSuchTableException
-    }
+    val table = client.getTable(databaseName, tblName)
 
-    if (table.getProperty("spark.sql.sources.provider") != null) {
+    if (table.properties.get("spark.sql.sources.provider").isDefined) {
       val dataSourceTable =
         cachedDataSourceTables(QualifiedTableName(databaseName, tblName).toLowerCase)
       // Then, if alias is specified, wrap the table with a Subquery using the alias.
@@ -215,22 +213,16 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
           Subquery(tableIdent.last, dataSourceTable))
 
       withAlias
-    } else if (table.isView) {
-      // if the unresolved relation is from hive view
-      // parse the text into logic node.
-      HiveQl.createPlanForView(table, alias)
+    } else if (table.tableType == VirtualView) {
+      val viewText = table.viewText.getOrElse(sys.error("Invalid view without text."))
+      alias match {
+        // because hive use things like `_c0` to build the expanded text
+        // currently we cannot support view from "create view v1(c1) as ..."
+        case None => Subquery(table.name, HiveQl.createPlan(viewText))
+        case Some(aliasText) => Subquery(aliasText, HiveQl.createPlan(viewText))
+      }
     } else {
-      val partitions: Seq[Partition] =
-        if (table.isPartitioned) {
-          synchronized {
-            HiveShim.getAllPartitionsOf(client, table).toSeq
-          }
-        } else {
-          Nil
-        }
-
-      MetastoreRelation(databaseName, tblName, alias)(
-        table.getTTable, partitions.map(part => part.getTPartition))(hive)
+      MetastoreRelation(databaseName, tblName, alias)(table)(hive)
     }
   }
 
@@ -318,178 +310,10 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
     result.newInstance()
   }
 
-  override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = synchronized {
-    val dbName = if (!caseSensitive) {
-      if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None
-    } else {
-      databaseName
-    }
-    val db = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
-
-    client.getAllTables(db).map(tableName => (tableName, false))
-  }
-
-  /**
-   * Create table with specified database, table name, table description and schema
-   * @param databaseName Database Name
-   * @param tableName Table Name
-   * @param schema Schema of the new table, if not specified, will use the schema
-   *               specified in crtTbl
-   * @param allowExisting if true, ignore AlreadyExistsException
-   * @param desc CreateTableDesc object which contains the SerDe info. Currently
-   *               we support most of the features except the bucket.
-   */
-  def createTable(
-      databaseName: String,
-      tableName: String,
-      schema: Seq[Attribute],
-      allowExisting: Boolean = false,
-      desc: Option[CreateTableDesc] = None) {
-    val hconf = hive.hiveconf
-
-    val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName)
-    val tbl = new Table(dbName, tblName)
-
-    val crtTbl: CreateTableDesc = desc.getOrElse(null)
-
-    // We should respect the passed in schema, unless it's not set
-    val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) {
-      crtTbl.getCols
-    } else {
-      schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), null))
-    }
-    tbl.setFields(hiveSchema)
-
-    // Most of code are similar with the DDLTask.createTable() of Hive,
-    if (crtTbl != null && crtTbl.getTblProps() != null) {
-      tbl.getTTable().getParameters().putAll(crtTbl.getTblProps())
-    }
-
-    if (crtTbl != null && crtTbl.getPartCols() != null) {
-      tbl.setPartCols(crtTbl.getPartCols())
-    }
-
-    if (crtTbl != null && crtTbl.getStorageHandler() != null) {
-      tbl.setProperty(
-        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE,
-        crtTbl.getStorageHandler())
-    }
-
-    /*
-     * We use LazySimpleSerDe by default.
-     *
-     * If the user didn't specify a SerDe, and any of the columns are not simple
-     * types, we will have to use DynamicSerDe instead.
-     */
-    if (crtTbl == null || crtTbl.getSerName() == null) {
-      val storageHandler = tbl.getStorageHandler()
-      if (storageHandler == null) {
-        logInfo(s"Default to LazySimpleSerDe for table $dbName.$tblName")
-        tbl.setSerializationLib(classOf[LazySimpleSerDe].getName())
-
-        import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-        import org.apache.hadoop.io.Text
-        import org.apache.hadoop.mapred.TextInputFormat
-
-        tbl.setInputFormatClass(classOf[TextInputFormat])
-        tbl.setOutputFormatClass(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]])
-        tbl.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-      } else {
-        val serDeClassName = storageHandler.getSerDeClass().getName()
-        logInfo(s"Use StorageHandler-supplied $serDeClassName for table $dbName.$tblName")
-        tbl.setSerializationLib(serDeClassName)
-      }
-    } else {
-      // let's validate that the serde exists
-      val serdeName = crtTbl.getSerName()
-      try {
-        val d = ReflectionUtils.newInstance(hconf.getClassByName(serdeName), hconf)
-        if (d != null) {
-          logDebug("Found class for $serdeName")
-        }
-      } catch {
-        case e: SerDeException => throw new HiveException("Cannot validate serde: " + serdeName, e)
-      }
-      tbl.setSerializationLib(serdeName)
-    }
-
-    if (crtTbl != null && crtTbl.getFieldDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.FIELD_DELIM, crtTbl.getFieldDelim())
-      tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, crtTbl.getFieldDelim())
-    }
-    if (crtTbl != null && crtTbl.getFieldEscape() != null) {
-      tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, crtTbl.getFieldEscape())
-    }
-
-    if (crtTbl != null && crtTbl.getCollItemDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, crtTbl.getCollItemDelim())
-    }
-    if (crtTbl != null && crtTbl.getMapKeyDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, crtTbl.getMapKeyDelim())
-    }
-    if (crtTbl != null && crtTbl.getLineDelim() != null) {
-      tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim())
-    }
-    HiveShim.setTblNullFormat(crtTbl, tbl)
-
-    if (crtTbl != null && crtTbl.getSerdeProps() != null) {
-      val iter = crtTbl.getSerdeProps().entrySet().iterator()
-      while (iter.hasNext()) {
-        val m = iter.next()
-        tbl.setSerdeParam(m.getKey(), m.getValue())
-      }
-    }
-
-    if (crtTbl != null && crtTbl.getComment() != null) {
-      tbl.setProperty("comment", crtTbl.getComment())
-    }
-
-    if (crtTbl != null && crtTbl.getLocation() != null) {
-      HiveShim.setLocation(tbl, crtTbl)
-    }
-
-    if (crtTbl != null && crtTbl.getSkewedColNames() != null) {
-      tbl.setSkewedColNames(crtTbl.getSkewedColNames())
-    }
-    if (crtTbl != null && crtTbl.getSkewedColValues() != null) {
-      tbl.setSkewedColValues(crtTbl.getSkewedColValues())
-    }
-
-    if (crtTbl != null) {
-      tbl.setStoredAsSubDirectories(crtTbl.isStoredAsSubDirectories())
-      tbl.setInputFormatClass(crtTbl.getInputFormat())
-      tbl.setOutputFormatClass(crtTbl.getOutputFormat())
-    }
-
-    tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName())
-    tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName())
-
-    if (crtTbl != null && crtTbl.isExternal()) {
-      tbl.setProperty("EXTERNAL", "TRUE")
-      tbl.setTableType(TableType.EXTERNAL_TABLE)
-    }
-
-    // set owner
-    try {
-      tbl.setOwner(hive.hiveconf.getUser)
-    } catch {
-      case e: IOException => throw new HiveException("Unable to get current user", e)
-    }
-
-    // set create time
-    tbl.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int])
-
-    // TODO add bucket support
-    // TODO set more info if Hive upgrade
+  override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
+    val db = databaseName.getOrElse(client.currentDatabase)
 
-    // create the table
-    synchronized {
-      try client.createTable(tbl, allowExisting) catch {
-        case e: org.apache.hadoop.hive.metastore.api.AlreadyExistsException
-          if allowExisting => // Do nothing
-        case e: Throwable => throw e
-      }
-    }
+    client.listTables(db).map(tableName => (tableName, false))
   }
 
   protected def processDatabaseAndTableName(
@@ -598,42 +422,11 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
       // Wait until children are resolved.
       case p: LogicalPlan if !p.childrenResolved => p
 
-      // TODO extra is in type of ASTNode which means the logical plan is not resolved
-      // Need to think about how to implement the CreateTableAsSelect.resolved
-      case CreateTableAsSelect(db, tableName, child, allowExisting, Some(extra: ASTNode)) =>
-        val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
-        val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
-
-        // Get the CreateTableDesc from Hive SemanticAnalyzer
-        val desc: Option[CreateTableDesc] = if (tableExists(Seq(databaseName, tblName))) {
-          None
-        } else {
-          val sa = new SemanticAnalyzer(hive.hiveconf) {
-            override def analyzeInternal(ast: ASTNode) {
-              // A hack to intercept the SemanticAnalyzer.analyzeInternal,
-              // to ignore the SELECT clause of the CTAS
-              val method = classOf[SemanticAnalyzer].getDeclaredMethod(
-                "analyzeCreateTable", classOf[ASTNode], classOf[QB])
-              method.setAccessible(true)
-              method.invoke(this, ast, this.getQB)
-            }
-          }
-
-          sa.analyze(extra, new Context(hive.hiveconf))
-          Some(sa.getQB().getTableDesc)
-        }
-
-        // Check if the query specifies file format or storage handler.
-        val hasStorageSpec = desc match {
-          case Some(crtTbl) =>
-            crtTbl != null && (crtTbl.getSerName != null || crtTbl.getStorageHandler != null)
-          case None => false
-        }
-
-        if (hive.convertCTAS && !hasStorageSpec) {
+      case CreateTableAsSelect(desc, child, allowExisting) =>
+        if (hive.convertCTAS && !desc.serde.isDefined) {
           // Do the conversion when spark.sql.hive.convertCTAS is true and the query
           // does not specify any storage format (file format and storage handler).
-          if (dbName.isDefined) {
+          if (desc.specifiedDatabase.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
               "when spark.sql.hive.convertCTAS is set to true.")
@@ -641,7 +434,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
 
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
-            tblName,
+            desc.name,
             hive.conf.defaultDataSourceName,
             temporary = false,
             mode,
@@ -650,19 +443,19 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
           )
         } else {
           execution.CreateTableAsSelect(
-            databaseName,
-            tableName,
+            desc.copy(
+              specifiedDatabase = Option(desc.specifiedDatabase.getOrElse(client.currentDatabase))),
             child,
-            allowExisting,
-            desc)
+            allowExisting)
         }
 
       case p: LogicalPlan if p.resolved => p
 
-      case p @ CreateTableAsSelect(db, tableName, child, allowExisting, None) =>
-        val (dbName, tblName) = processDatabaseAndTableName(db, tableName)
+      case p @ CreateTableAsSelect(desc, child, allowExisting) =>
+        val (dbName, tblName) = processDatabaseAndTableName(desc.database, desc.name)
+
         if (hive.convertCTAS) {
-          if (dbName.isDefined) {
+          if (desc.specifiedDatabase.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
               "when spark.sql.hive.convertCTAS is set to true.")
@@ -678,13 +471,10 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
             child
           )
         } else {
-          val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase)
           execution.CreateTableAsSelect(
-            databaseName,
-            tableName,
+            desc,
             child,
-            allowExisting,
-            None)
+            allowExisting)
         }
     }
   }
@@ -767,7 +557,7 @@ private[hive] case class InsertIntoHiveTable(
 
 private[hive] case class MetastoreRelation
     (databaseName: String, tableName: String, alias: Option[String])
-    (val table: TTable, val partitions: Seq[TPartition])
+    (val table: HiveTable)
     (@transient sqlContext: SQLContext)
   extends LeafNode with MultiInstanceRelation {
 
@@ -786,16 +576,63 @@ private[hive] case class MetastoreRelation
     Objects.hashCode(databaseName, tableName, alias, output)
   }
 
-  // TODO: Can we use org.apache.hadoop.hive.ql.metadata.Table as the type of table and
-  // use org.apache.hadoop.hive.ql.metadata.Partition as the type of elements of partitions.
-  // Right now, using org.apache.hadoop.hive.ql.metadata.Table and
-  // org.apache.hadoop.hive.ql.metadata.Partition will cause a NotSerializableException
-  // which indicates the SerDe we used is not Serializable.
+  @transient val hiveQlTable: Table = {
+    // We start by constructing an API table as Hive performs several important transformations
+    // internally when converting an API table to a QL table.
+    val tTable = new org.apache.hadoop.hive.metastore.api.Table()
+    tTable.setTableName(table.name)
+    tTable.setDbName(table.database)
+
+    val tableParameters = new java.util.HashMap[String, String]()
+    tTable.setParameters(tableParameters)
+    table.properties.foreach { case (k, v) => tableParameters.put(k, v) }
+
+    tTable.setTableType(table.tableType.name)
+
+    val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
+    tTable.setSd(sd)
+    sd.setCols(table.schema.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
+    tTable.setPartitionKeys(
+      table.partitionColumns.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
+
+    table.location.foreach(sd.setLocation)
+    table.inputFormat.foreach(sd.setInputFormat)
+    table.outputFormat.foreach(sd.setOutputFormat)
+
+    val serdeInfo = new org.apache.hadoop.hive.metastore.api.SerDeInfo
+    sd.setSerdeInfo(serdeInfo)
+    table.serde.foreach(serdeInfo.setSerializationLib)
+    val serdeParameters = new java.util.HashMap[String, String]()
+    serdeInfo.setParameters(serdeParameters)
+    table.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) }
+
+    new Table(tTable)
+  }
+
+  @transient val hiveQlPartitions: Seq[Partition] = table.getAllPartitions.map { p =>
+    val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
+    tPartition.setDbName(databaseName)
+    tPartition.setTableName(tableName)
+    tPartition.setValues(p.values)
+
+    val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
+    tPartition.setSd(sd)
+    sd.setCols(table.schema.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
+
+    sd.setLocation(p.storage.location)
+    sd.setInputFormat(p.storage.inputFormat)
+    sd.setOutputFormat(p.storage.outputFormat)
+
+    val serdeInfo = new org.apache.hadoop.hive.metastore.api.SerDeInfo
+    sd.setSerdeInfo(serdeInfo)
+    serdeInfo.setSerializationLib(p.storage.serde)
 
-  @transient val hiveQlTable: Table = new Table(table)
+    val serdeParameters = new java.util.HashMap[String, String]()
+    serdeInfo.setParameters(serdeParameters)
+    table.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) }
+    p.storage.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) }
 
-  @transient val hiveQlPartitions: Seq[Partition] = partitions.map { p =>
-    new Partition(hiveQlTable, p)
+    new Partition(hiveQlTable, tPartition)
   }
 
   @transient override lazy val statistics: Statistics = Statistics(
@@ -865,7 +702,7 @@ private[hive] case class MetastoreRelation
   val columnOrdinals = AttributeMap(attributes.zipWithIndex)
 
   override def newInstance(): MetastoreRelation = {
-    MetastoreRelation(databaseName, tableName, alias)(table, partitions)(sqlContext)
+    MetastoreRelation(databaseName, tableName, alias)(table)(sqlContext)
   }
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 6176aee25e7a4..f30b196734c40 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.execution.ExplainCommand
 import org.apache.spark.sql.sources.DescribeCommand
+import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DropTable, AnalyzeTable, HiveScriptIOSchema}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
@@ -50,7 +51,19 @@ import scala.collection.JavaConversions._
  * back for Hive to execute natively.  Will be replaced with a native command that contains the
  * cmd string.
  */
-private[hive] case object NativePlaceholder extends Command
+private[hive] case object NativePlaceholder extends LogicalPlan {
+  override def children: Seq[LogicalPlan] = Seq.empty
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+case class CreateTableAsSelect(
+    tableDesc: HiveTable,
+    child: LogicalPlan,
+    allowExisting: Boolean) extends UnaryNode with Command {
+
+  override def output: Seq[Attribute] = Seq.empty[Attribute]
+  override lazy val resolved: Boolean = tableDesc.specifiedDatabase.isDefined && childrenResolved
+}
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
 private[hive] object HiveQl {
@@ -78,16 +91,16 @@ private[hive] object HiveQl {
     "TOK_ALTERVIEW_DROPPARTS",
     "TOK_ALTERVIEW_PROPERTIES",
     "TOK_ALTERVIEW_RENAME",
-    
+
     "TOK_CREATEDATABASE",
     "TOK_CREATEFUNCTION",
     "TOK_CREATEINDEX",
     "TOK_CREATEROLE",
     "TOK_CREATEVIEW",
-    
+
     "TOK_DESCDATABASE",
     "TOK_DESCFUNCTION",
-    
+
     "TOK_DROPDATABASE",
     "TOK_DROPFUNCTION",
     "TOK_DROPINDEX",
@@ -95,22 +108,22 @@ private[hive] object HiveQl {
     "TOK_DROPTABLE_PROPERTIES",
     "TOK_DROPVIEW",
     "TOK_DROPVIEW_PROPERTIES",
-    
+
     "TOK_EXPORT",
-    
+
     "TOK_GRANT",
     "TOK_GRANT_ROLE",
-    
+
     "TOK_IMPORT",
-    
+
     "TOK_LOAD",
-    
+
     "TOK_LOCKTABLE",
-    
+
     "TOK_MSCK",
-    
+
     "TOK_REVOKE",
-    
+
     "TOK_SHOW_COMPACTIONS",
     "TOK_SHOW_CREATETABLE",
     "TOK_SHOW_GRANT",
@@ -127,9 +140,9 @@ private[hive] object HiveQl {
     "TOK_SHOWINDEXES",
     "TOK_SHOWLOCKS",
     "TOK_SHOWPARTITIONS",
-    
+
     "TOK_SWITCHDATABASE",
-    
+
     "TOK_UNLOCKTABLE"
   )
 
@@ -259,6 +272,7 @@ private[hive] object HiveQl {
           case otherMessage =>
             throw new AnalysisException(otherMessage)
         }
+      case e: MatchError => throw e
       case e: Exception =>
         throw new AnalysisException(e.getMessage)
       case e: NotImplementedError =>
@@ -272,14 +286,6 @@ private[hive] object HiveQl {
     }
   }
 
-  /** Creates LogicalPlan for a given VIEW */
-  def createPlanForView(view: Table, alias: Option[String]): Subquery = alias match {
-    // because hive use things like `_c0` to build the expanded text
-    // currently we cannot support view from "create view v1(c1) as ..."
-    case None => Subquery(view.getTableName, createPlan(view.getViewExpandedText))
-    case Some(aliasText) => Subquery(aliasText, createPlan(view.getViewExpandedText))
-  }
-
   def parseDdl(ddl: String): Seq[Attribute] = {
     val tree =
       try {
@@ -453,6 +459,14 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
     (keys, bitmasks)
   }
 
+  protected def getProperties(node: Node): Seq[(String, String)] = node match {
+    case Token("TOK_TABLEPROPLIST", list) =>
+      list.map {
+        case Token("TOK_TABLEPROPERTY", Token(key, Nil) :: Token(value, Nil) :: Nil) =>
+          (unquoteString(key) -> unquoteString(value))
+      }
+  }
+
   protected def nodeToPlan(node: Node): LogicalPlan = node match {
     // Special drop table that also uncaches.
     case Token("TOK_DROPTABLE",
@@ -562,7 +576,62 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           children)
       val (db, tableName) = extractDbNameTableName(tableNameParts)
 
-      CreateTableAsSelect(db, tableName, nodeToPlan(query), allowExisting != None, Some(node))
+      var tableDesc =
+        HiveTable(
+          specifiedDatabase = db,
+          name = tableName,
+          schema = Seq.empty,
+          partitionColumns = Seq.empty,
+          properties = Map.empty,
+          serdeProperties = Map.empty,
+          tableType = ManagedTable,
+          location = None,
+          inputFormat = None,
+          outputFormat = None,
+          serde = None)
+
+      // TODO: Handle all the cases here...
+      children.foreach {
+        case Token("TOK_TBLRCFILE", Nil) =>
+          import org.apache.hadoop.hive.ql.io.{RCFileInputFormat, RCFileOutputFormat}
+          tableDesc = tableDesc.copy(
+            outputFormat = Option(classOf[RCFileOutputFormat].getName),
+            inputFormat = Option(classOf[RCFileInputFormat[_, _]].getName))
+
+          if (tableDesc.serde.isEmpty) {
+            tableDesc = tableDesc.copy(
+              serde = Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
+          }
+        case Token("TOK_TBLORCFILE", Nil) =>
+          tableDesc = tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"),
+            serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+
+        case Token("TOK_TBLPARQUETFILE", Nil) =>
+          tableDesc = tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+            serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+
+        case Token("TOK_TABLESERIALIZER",
+               Token("TOK_SERDENAME", Token(serdeName, Nil) :: otherProps) :: Nil) =>
+          tableDesc = tableDesc.copy(serde = Option(unquoteString(serdeName)))
+
+          otherProps match {
+            case Token("TOK_TABLEPROPERTIES", list :: Nil) :: Nil =>
+              tableDesc = tableDesc.copy(
+                serdeProperties = tableDesc.serdeProperties ++ getProperties(list))
+            case Nil =>
+          }
+
+        case Token("TOK_TABLEPROPERTIES", list :: Nil) =>
+          tableDesc = tableDesc.copy(properties = tableDesc.properties ++ getProperties(list))
+
+        case _ =>
+      }
+
+      CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting != None)
 
     // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command.
     case Token("TOK_CREATETABLE", _) => NativePlaceholder
@@ -759,7 +828,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
               case Token("TOK_CUBE_GROUPBY", children) =>
                 Cube(children.map(nodeToExpr), withLateralView, selectExpressions)
               case _ => sys.error("Expect WITH CUBE")
-            }), 
+            }),
             Some(Project(selectExpressions, withLateralView))).flatten.head
         }
 
@@ -1077,6 +1146,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
   }
 
   protected val escapedIdentifier = "`([^`]+)`".r
+  protected val doubleQuotedString = "\"([^\"]+)\"".r
+  protected val singleQuotedString = "'([^']+)'".r
+
+  protected def unquoteString(str: String) = str match {
+    case singleQuotedString(s) => s
+    case doubleQuotedString(s) => s
+    case other => other
+  }
+
   /** Strips backticks from ident if present */
   protected def cleanIdentifier(ident: String): String = ident match {
     case escapedIdentifier(i) => i
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index e556c74ffb015..b69312f0f8717 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
 
 import org.apache.spark.SerializableWritable
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.DateUtils
@@ -57,7 +58,7 @@ class HadoopTableReader(
     @transient relation: MetastoreRelation,
     @transient sc: HiveContext,
     @transient hiveExtraConf: HiveConf)
-  extends TableReader {
+  extends TableReader with Logging {
 
   // Hadoop honors "mapred.map.tasks" as hint, but will ignore when mapred.job.tracker is "local".
   // https://hadoop.apache.org/docs/r1.0.4/mapred-default.html
@@ -78,7 +79,7 @@ class HadoopTableReader(
     makeRDDForTable(
       hiveTable,
       Class.forName(
-        relation.tableDesc.getSerdeClassName, true, sc.sessionState.getConf.getClassLoader)
+        relation.tableDesc.getSerdeClassName, true, Utils.getSparkClassLoader)
         .asInstanceOf[Class[Deserializer]],
       filterOpt = None)
 
@@ -145,7 +146,7 @@ class HadoopTableReader(
       partitionToDeserializer: Map[HivePartition,
       Class[_ <: Deserializer]],
       filterOpt: Option[PathFilter]): RDD[Row] = {
-        
+
     // SPARK-5068:get FileStatus and do the filtering locally when the path is not exists
     def verifyPartitionPath(
         partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]]):
@@ -288,7 +289,7 @@ class HadoopTableReader(
   }
 }
 
-private[hive] object HadoopTableReader extends HiveInspectors {
+private[hive] object HadoopTableReader extends HiveInspectors with Logging {
   /**
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.
@@ -329,6 +330,8 @@ private[hive] object HadoopTableReader extends HiveInspectors {
         tableDeser.getObjectInspector).asInstanceOf[StructObjectInspector]
     }
 
+    logDebug(soi.toString)
+
     val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
       soi.getStructFieldRef(attr.name) -> ordinal
     }.unzip
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala
index a863aa77cb7e0..0a1d761a52f88 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala
@@ -17,30 +17,35 @@
 
 package org.apache.spark.sql.hive.client
 
+import java.io.PrintStream
+import java.util.{Map => JMap}
+
 import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException}
 
-case class HiveDatabase(
+private[hive] case class HiveDatabase(
     name: String,
     location: String)
 
-abstract class TableType { val name: String }
-case object ExternalTable extends TableType { override val name = "EXTERNAL_TABLE" }
-case object IndexTable extends TableType { override val name = "INDEX_TABLE" }
-case object ManagedTable extends TableType { override val name = "MANAGED_TABLE" }
-case object VirtualView extends TableType { override val name = "VIRTUAL_VIEW" }
+private[hive] abstract class TableType { val name: String }
+private[hive] case object ExternalTable extends TableType { override val name = "EXTERNAL_TABLE" }
+private[hive] case object IndexTable extends TableType { override val name = "INDEX_TABLE" }
+private[hive] case object ManagedTable extends TableType { override val name = "MANAGED_TABLE" }
+private[hive] case object VirtualView extends TableType { override val name = "VIRTUAL_VIEW" }
 
-case class HiveStorageDescriptor(
+// TODO: Use this for Tables and Partitions
+private[hive] case class HiveStorageDescriptor(
     location: String,
     inputFormat: String,
     outputFormat: String,
-    serde: String)
+    serde: String,
+    serdeProperties: Map[String, String])
 
-case class HivePartition(
+private[hive] case class HivePartition(
     values: Seq[String],
     storage: HiveStorageDescriptor)
 
-case class HiveColumn(name: String, hiveType: String, comment: String)
-case class HiveTable(
+private[hive] case class HiveColumn(name: String, hiveType: String, comment: String)
+private[hive] case class HiveTable(
     specifiedDatabase: Option[String],
     name: String,
     schema: Seq[HiveColumn],
@@ -51,7 +56,8 @@ case class HiveTable(
     location: Option[String] = None,
     inputFormat: Option[String] = None,
     outputFormat: Option[String] = None,
-    serde: Option[String] = None) {
+    serde: Option[String] = None,
+    viewText: Option[String] = None) {
 
   @transient
   private[client] var client: ClientInterface = _
@@ -76,13 +82,17 @@ case class HiveTable(
  * internal and external classloaders for a given version of Hive and thus must expose only
  * shared classes.
  */
-trait ClientInterface {
+private[hive] trait ClientInterface {
   /**
    * Runs a HiveQL command using Hive, returning the results as a list of strings.  Each row will
    * result in one string.
    */
   def runSqlHive(sql: String): Seq[String]
 
+  def setOut(stream: PrintStream): Unit
+  def setInfo(stream: PrintStream): Unit
+  def setError(stream: PrintStream): Unit
+
   /** Returns the names of all tables in the given database. */
   def listTables(dbName: String): Seq[String]
 
@@ -114,6 +124,11 @@ trait ClientInterface {
   /** Creates a new database with the given name. */
   def createDatabase(database: HiveDatabase): Unit
 
+  /** Returns the specified paritition or None if it does not exist. */
+  def getPartitionOption(
+      hTable: HiveTable,
+      partitionSpec: JMap[String, String]): Option[HivePartition]
+
   /** Returns all partitions for the given table. */
   def getAllPartitions(hTable: HiveTable): Seq[HivePartition]
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
index ea52fea037f1f..6bca9d0179fe3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.client
 
 import java.io.{BufferedReader, InputStreamReader, File, PrintStream}
 import java.net.URI
-import java.util.{ArrayList => JArrayList}
+import java.util.{ArrayList => JArrayList, Map => JMap, List => JList, Set => JSet}
 
 import scala.collection.JavaConversions._
 import scala.language.reflectiveCalls
@@ -27,6 +27,7 @@ import scala.language.reflectiveCalls
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.metastore.api.Database
 import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.metastore.TableType
 import org.apache.hadoop.hive.metastore.api
 import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.ql.metadata
@@ -54,19 +55,13 @@ import org.apache.spark.sql.execution.QueryExecutionException
  * @param config  a collection of configuration options that will be added to the hive conf before
  *                opening the hive client.
  */
-class ClientWrapper(
+private[hive] class ClientWrapper(
     version: HiveVersion,
     config: Map[String, String])
   extends ClientInterface
   with Logging
   with ReflectionMagic {
 
-  private val conf = new HiveConf(classOf[SessionState])
-  config.foreach { case (k, v) =>
-    logDebug(s"Hive Config: $k=$v")
-    conf.set(k, v)
-  }
-
   // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
   private val outputBuffer = new java.io.OutputStream {
     var pos: Int = 0
@@ -99,17 +94,31 @@ class ClientWrapper(
     val original = Thread.currentThread().getContextClassLoader
     Thread.currentThread().setContextClassLoader(getClass.getClassLoader)
     val ret = try {
-      val newState = new SessionState(conf)
-      SessionState.start(newState)
-      newState.out = new PrintStream(outputBuffer, true, "UTF-8")
-      newState.err = new PrintStream(outputBuffer, true, "UTF-8")
-      newState
+      val oldState = SessionState.get()
+      if (oldState == null) {
+        val initialConf = new HiveConf(classOf[SessionState])
+        config.foreach { case (k, v) =>
+          logDebug(s"Hive Config: $k=$v")
+          initialConf.set(k, v)
+        }
+        val newState = new SessionState(initialConf)
+        SessionState.start(newState)
+        newState.out = new PrintStream(outputBuffer, true, "UTF-8")
+        newState.err = new PrintStream(outputBuffer, true, "UTF-8")
+        newState
+      } else {
+        oldState
+      }
     } finally {
       Thread.currentThread().setContextClassLoader(original)
     }
     ret
   }
 
+  /** Returns the configuration for the current session. */
+  def conf: HiveConf = SessionState.get().getConf
+
+  // TODO: should be a def?s
   private val client = Hive.get(conf)
 
   /**
@@ -133,6 +142,18 @@ class ClientWrapper(
     ret
   }
 
+  def setOut(stream: PrintStream): Unit = withHiveState {
+    state.out = stream
+  }
+
+  def setInfo(stream: PrintStream): Unit = withHiveState {
+    state.info = stream
+  }
+
+  def setError(stream: PrintStream): Unit = withHiveState {
+    state.err = stream
+  }
+
   override def currentDatabase: String = withHiveState {
     state.getCurrentDatabase
   }
@@ -171,14 +192,20 @@ class ClientWrapper(
         partitionColumns = h.getPartCols.map(f => HiveColumn(f.getName, f.getType, f.getComment)),
         properties = h.getParameters.toMap,
         serdeProperties = h.getTTable.getSd.getSerdeInfo.getParameters.toMap,
-        tableType = ManagedTable, // TODO
+        tableType = h.getTableType match {
+          case TableType.MANAGED_TABLE => ManagedTable
+          case TableType.EXTERNAL_TABLE => ExternalTable
+          case TableType.VIRTUAL_VIEW => VirtualView
+          case TableType.INDEX_TABLE => IndexTable
+        },
         location = version match {
           case hive.v12 => Option(h.call[URI]("getDataLocation")).map(_.toString)
           case hive.v13 => Option(h.call[Path]("getDataLocation")).map(_.toString)
         },
         inputFormat = Option(h.getInputFormatClass).map(_.getName),
         outputFormat = Option(h.getOutputFormatClass).map(_.getName),
-        serde = Option(h.getSerializationLib)).withClient(this)
+        serde = Option(h.getSerializationLib),
+        viewText = Option(h.getViewExpandedText)).withClient(this)
     }
     converted
   }
@@ -223,27 +250,40 @@ class ClientWrapper(
     client.alterTable(table.qualifiedName, qlTable)
   }
 
+  private def toHivePartition(partition: metadata.Partition): HivePartition = {
+    val apiPartition = partition.getTPartition
+    HivePartition(
+      values = Option(apiPartition.getValues).map(_.toSeq).getOrElse(Seq.empty),
+      storage = HiveStorageDescriptor(
+        location = apiPartition.getSd.getLocation,
+        inputFormat = apiPartition.getSd.getInputFormat,
+        outputFormat = apiPartition.getSd.getOutputFormat,
+        serde = apiPartition.getSd.getSerdeInfo.getSerializationLib,
+        serdeProperties = apiPartition.getSd.getSerdeInfo.getParameters.toMap))
+  }
+
+  override def getPartitionOption(
+      table: HiveTable,
+      partitionSpec: JMap[String, String]): Option[HivePartition] = withHiveState {
+
+    val qlTable = toQlTable(table)
+    val qlPartition = client.getPartition(qlTable, partitionSpec, false)
+    Option(qlPartition).map(toHivePartition)
+  }
+
   override def getAllPartitions(hTable: HiveTable): Seq[HivePartition] = withHiveState {
     val qlTable = toQlTable(hTable)
     val qlPartitions = version match {
       case hive.v12 =>
-        client.call[metadata.Table, Set[metadata.Partition]]("getAllPartitionsForPruner", qlTable)
+        client.call[metadata.Table, JSet[metadata.Partition]]("getAllPartitionsForPruner", qlTable)
       case hive.v13 =>
-        client.call[metadata.Table, Set[metadata.Partition]]("getAllPartitionsOf", qlTable)
+        client.call[metadata.Table, JSet[metadata.Partition]]("getAllPartitionsOf", qlTable)
     }
-    qlPartitions.map(_.getTPartition).map { p =>
-      HivePartition(
-        values = Option(p.getValues).map(_.toSeq).getOrElse(Seq.empty),
-        storage = HiveStorageDescriptor(
-          location = p.getSd.getLocation,
-          inputFormat = p.getSd.getInputFormat,
-          outputFormat = p.getSd.getOutputFormat,
-          serde = p.getSd.getSerdeInfo.getSerializationLib))
-    }.toSeq
+    qlPartitions.toSeq.map(toHivePartition)
   }
 
   override def listTables(dbName: String): Seq[String] = withHiveState {
-    client.getAllTables
+    client.getAllTables(dbName)
   }
 
   /**
@@ -267,11 +307,12 @@ class ClientWrapper(
     try {
       val cmd_trimmed: String = cmd.trim()
       val tokens: Array[String] = cmd_trimmed.split("\\s+")
+      // The remainder of the command.
       val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
       val proc: CommandProcessor = version match {
         case hive.v12 =>
           classOf[CommandProcessorFactory]
-            .callStatic[String, HiveConf, CommandProcessor]("get", cmd_1, conf)
+            .callStatic[String, HiveConf, CommandProcessor]("get", tokens(0), conf)
         case hive.v13 =>
           classOf[CommandProcessorFactory]
             .callStatic[Array[String], HiveConf, CommandProcessor]("get", Array(tokens(0)), conf)
@@ -294,7 +335,7 @@ class ClientWrapper(
               res.toSeq
             case hive.v13 =>
               val res = new JArrayList[Object]
-              driver.call[JArrayList[Object], Boolean]("getResults", res)
+              driver.call[JList[Object], Boolean]("getResults", res)
               res.map { r =>
                 r match {
                   case s: String => s
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 710dbca6e3c66..7f94c93ba49c1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive.client
 
 import java.io.File
-import java.net.URLClassLoader
+import java.net.{URL, URLClassLoader}
 import java.util
 
 import scala.language.reflectiveCalls
@@ -30,9 +30,10 @@ import org.apache.spark.Logging
 import org.apache.spark.deploy.SparkSubmitUtils
 
 import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.hive.HiveContext
 
 /** Factory for `IsolatedClientLoader` with specific versions of hive. */
-object IsolatedClientLoader {
+private[hive] object IsolatedClientLoader {
   /**
    * Creates isolated Hive client loaders by downloading the requested version from maven.
    */
@@ -49,7 +50,7 @@ object IsolatedClientLoader {
     case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13
   }
 
-  private def downloadVersion(version: HiveVersion): Seq[File] = {
+  private def downloadVersion(version: HiveVersion): Seq[URL] = {
     val hiveArtifacts =
       (Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") ++
         (if (version.hasBuiltinsJar) "hive-builtins" :: Nil else Nil))
@@ -72,10 +73,10 @@ object IsolatedClientLoader {
     tempDir.mkdir()
 
     allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir))
-    tempDir.listFiles()
+    tempDir.listFiles().map(_.toURL)
   }
 
-  private def resolvedVersions = new scala.collection.mutable.HashMap[HiveVersion, Seq[File]]
+  private def resolvedVersions = new scala.collection.mutable.HashMap[HiveVersion, Seq[URL]]
 }
 
 /**
@@ -99,9 +100,9 @@ object IsolatedClientLoader {
  * @param baseClassLoader The spark classloader that is used to load shared classes.
  *
  */
-class IsolatedClientLoader(
+private[hive] class IsolatedClientLoader(
     val version: HiveVersion,
-    val execJars: Seq[File] = Seq.empty,
+    val execJars: Seq[URL] = Seq.empty,
     val config: Map[String, String] = Map.empty,
     val isolationOn: Boolean = true,
     val rootClassLoader: ClassLoader = ClassLoader.getSystemClassLoader.getParent.getParent,
@@ -112,7 +113,7 @@ class IsolatedClientLoader(
   assert(Try(baseClassLoader.loadClass("org.apache.hive.HiveConf")).isFailure)
 
   /** All jars used by the hive specific classloader. */
-  protected def allJars = execJars.map(_.toURI.toURL).toArray
+  protected def allJars = execJars.toArray
 
   protected def isSharedClass(name: String): Boolean =
     name.contains("slf4j") ||
@@ -166,6 +167,12 @@ class IsolatedClientLoader(
       .getConstructors.head
       .newInstance(version, config)
       .asInstanceOf[ClientInterface]
+  } catch {
+    case ReflectionException(cnf: NoClassDefFoundError) =>
+      throw new ClassNotFoundException(
+        s"$cnf when creating Hive client using classpath: ${execJars.mkString(", ")}\n" +
+         "Please make sure that jars for your version of hive and hadoop are included in the " +
+        s"paths passed to ${HiveContext.HIVE_METASTORE_JARS}.")
   } finally {
     Thread.currentThread.setContextClassLoader(baseClassLoader)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala
index 90d03049356b5..c600b158c5460 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala
@@ -19,6 +19,14 @@ package org.apache.spark.sql.hive.client
 
 import scala.reflect._
 
+/** Unwraps reflection exceptions. */
+private[client] object ReflectionException {
+  def unapply(a: Throwable): Option[Throwable] = a match {
+    case ite: java.lang.reflect.InvocationTargetException => Option(ite.getCause)
+    case _ => None
+  }
+}
+
 /**
  * Provides implicit functions on any object for calling methods reflectively.
  */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 76a1965f3cb25..91e6ac4032204 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -24,8 +24,8 @@ import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.RunnableCommand
-import org.apache.spark.sql.hive.HiveContext
-import org.apache.spark.sql.hive.MetastoreRelation
+import org.apache.spark.sql.hive.client.{HiveTable, HiveColumn}
+import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, HiveMetastoreTypes}
 
 /**
  * Create table and insert the query result into it.
@@ -39,17 +39,34 @@ import org.apache.spark.sql.hive.MetastoreRelation
  */
 private[hive]
 case class CreateTableAsSelect(
-    database: String,
-    tableName: String,
+    tableDesc: HiveTable,
     query: LogicalPlan,
-    allowExisting: Boolean,
-    desc: Option[CreateTableDesc]) extends RunnableCommand {
+    allowExisting: Boolean)
+  extends RunnableCommand {
+
+  def database: String = tableDesc.database
+  def tableName: String = tableDesc.name
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     lazy val metastoreRelation: MetastoreRelation = {
-      // Create Hive Table
-      hiveContext.catalog.createTable(database, tableName, query.output, allowExisting, desc)
+      import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+      import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+      import org.apache.hadoop.io.Text
+      import org.apache.hadoop.mapred.TextInputFormat
+
+      val withSchema =
+        tableDesc.copy(
+          schema =
+            query.output.map(c =>
+              HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)),
+          inputFormat =
+            tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)),
+          outputFormat =
+            tableDesc.outputFormat
+              .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)),
+          serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName())))
+      hiveContext.catalog.client.createTable(withSchema)
 
       // Get the Metastore Relation
       hiveContext.catalog.lookupRelation(Seq(database, tableName), None) match {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 89995a91b1a92..de8954d5dec99 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -200,9 +200,7 @@ case class InsertIntoHiveTable(
           orderedPartitionSpec.put(entry.getName,partitionSpec.get(entry.getName).getOrElse(""))
       }
       val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec)
-      catalog.synchronized {
-        catalog.client.validatePartitionNameCharacters(partVals)
-      }
+
       // inheritTableSpecs is set to true. It should be set to false for a IMPORT query
       // which is currently considered as a Hive native command.
       val inheritTableSpecs = true
@@ -211,7 +209,7 @@ case class InsertIntoHiveTable(
       if (numDynamicPartitions > 0) {
         catalog.synchronized {
           catalog.client.loadDynamicPartitions(
-            outputPath,
+            outputPath.toString,
             qualifiedTableName,
             orderedPartitionSpec,
             overwrite,
@@ -224,31 +222,28 @@ case class InsertIntoHiveTable(
         // ifNotExists is only valid with static partition, refer to
         // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML#LanguageManualDML-InsertingdataintoHiveTablesfromqueries
         // scalastyle:on
-        val oldPart = catalog.synchronized {
-          catalog.client.getPartition(
-            catalog.client.getTable(qualifiedTableName), partitionSpec, false)
-        }
-        if (oldPart == null || !ifNotExists) {
-          catalog.synchronized {
+        val oldPart =
+          catalog.client.getPartitionOption(
+            catalog.client.getTable(table.databaseName, table.tableName),
+            partitionSpec)
+
+        if (oldPart.isEmpty || !ifNotExists) {
             catalog.client.loadPartition(
-              outputPath,
+              outputPath.toString,
               qualifiedTableName,
               orderedPartitionSpec,
               overwrite,
               holdDDLTime,
               inheritTableSpecs,
               isSkewedStoreAsSubdir)
-          }
         }
       }
     } else {
-      catalog.synchronized {
-        catalog.client.loadTable(
-          outputPath,
-          qualifiedTableName,
-          overwrite,
-          holdDDLTime)
-      }
+      catalog.client.loadTable(
+        outputPath.toString, // TODO: URI
+        qualifiedTableName,
+        overwrite,
+        holdDDLTime)
     }
 
     // Invalidate the cache.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index a40a1e53117cd..abab1a223a43a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /**
  * Analyzes the given table in the current database to generate statistics, which will be
@@ -84,8 +85,20 @@ case class AddJar(path: String) extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
+    val currentClassLoader = Utils.getContextOrSparkClassLoader
+
+    // Add jar to current context
+    val jarURL = new java.io.File(path).toURL
+    val newClassLoader = new java.net.URLClassLoader(Array(jarURL), currentClassLoader)
+    Thread.currentThread.setContextClassLoader(newClassLoader)
+    org.apache.hadoop.hive.ql.metadata.Hive.get().getConf().setClassLoader(newClassLoader)
+
+    // Add jar to isolated hive classloader
     hiveContext.runSqlHive(s"ADD JAR $path")
+
+    // Add jar to executors
     hiveContext.sparkContext.addJar(path)
+
     Seq(Row(0))
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index ca84b43a998b8..1f40a5340c2ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.test
 import java.io.File
 import java.util.{Set => JavaSet}
 
+import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry
 import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
 import org.apache.hadoop.hive.ql.metadata.Table
@@ -62,6 +63,8 @@ object TestHive
 class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   self =>
 
+  import HiveContext._
+
   // By clearing the port we force Spark to pick a new one.  This allows us to rerun tests
   // without restarting the JVM.
   System.clearProperty("spark.hostPort")
@@ -70,24 +73,16 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   hiveconf.set("hive.plan.serialization.format", "javaXML")
 
   lazy val warehousePath = Utils.createTempDir()
-  lazy val metastorePath = Utils.createTempDir()
 
   /** Sets up the system initially or after a RESET command */
-  protected def configure(): Unit = {
-    warehousePath.delete()
-    metastorePath.delete()
-    setConf("javax.jdo.option.ConnectionURL",
-      s"jdbc:derby:;databaseName=$metastorePath;create=true")
-    setConf("hive.metastore.warehouse.dir", warehousePath.toString)
-  }
+  protected override def configure(): Map[String, String] =
+   newTemporaryConfiguration() ++ Map("hive.metastore.warehouse.dir" -> warehousePath.toString)
 
   val testTempDir = Utils.createTempDir()
 
   // For some hive test case which contain ${system:test.tmp.dir}
   System.setProperty("test.tmp.dir", testTempDir.getCanonicalPath)
 
-  configure() // Must be called before initializing the catalog below.
-
   /** The location of the compiled hive distribution */
   lazy val hiveHome = envVarToFile("HIVE_HOME")
   /** The location of the hive source code. */
@@ -195,6 +190,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
    * A list of test tables and the DDL required to initialize them.  A test table is loaded on
    * demand when a query are run against it.
    */
+  @transient
   lazy val testTables = new mutable.HashMap[String, TestTable]()
 
   def registerTestTable(testTable: TestTable): Unit = {
@@ -204,6 +200,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   // The test tables that are defined in the Hive QTestUtil.
   // /itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
   // https://github.com/apache/hive/blob/branch-0.13/data/scripts/q_test_init.sql
+  @transient
   val hiveQTestUtilTables = Seq(
     TestTable("src",
       "CREATE TABLE src (key INT, value STRING)".cmd,
@@ -236,16 +233,18 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       import org.apache.hadoop.mapred.{SequenceFileInputFormat, SequenceFileOutputFormat}
       import org.apache.thrift.protocol.TBinaryProtocol
 
-      val srcThrift = new Table("default", "src_thrift")
-      srcThrift.setFields(Nil)
-      srcThrift.setInputFormatClass(classOf[SequenceFileInputFormat[_,_]].getName)
-      // In Hive, SequenceFileOutputFormat will be substituted by HiveSequenceFileOutputFormat.
-      srcThrift.setOutputFormatClass(classOf[SequenceFileOutputFormat[_,_]].getName)
-      srcThrift.setSerializationLib(classOf[ThriftDeserializer].getName)
-      srcThrift.setSerdeParam("serialization.class", classOf[Complex].getName)
-      srcThrift.setSerdeParam("serialization.format", classOf[TBinaryProtocol].getName)
-      catalog.client.createTable(srcThrift)
-
+      runSqlHive(
+        s"""
+         |CREATE TABLE src_thrift(fake INT)
+         |ROW FORMAT SERDE '${classOf[ThriftDeserializer].getName}'
+         |WITH SERDEPROPERTIES(
+         |  'serialization.class'='${classOf[Complex].getName}',
+         |  'serialization.format'='${classOf[TBinaryProtocol].getName}'
+         |)
+         |STORED AS
+         |INPUTFORMAT '${classOf[SequenceFileInputFormat[_,_]].getName}'
+         |OUTPUTFORMAT '${classOf[SequenceFileOutputFormat[_,_]].getName}'
+        """.stripMargin)
 
       runSqlHive(
         s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/complex.seq")}' INTO TABLE src_thrift")
@@ -367,7 +366,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     if (!(loadedTables contains name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
       loadedTables += name
-      logInfo(s"Loading test table $name")
+      logDebug(s"Loading test table $name")
       val createCmds =
         testTables.get(name).map(_.commands).getOrElse(sys.error(s"Unknown test table $name"))
       createCmds.foreach(_())
@@ -384,9 +383,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
    */
   protected val originalUdfs: JavaSet[String] = FunctionRegistry.getFunctionNames
 
-  // Database default may not exist in 0.13.1, create it if not exist
-  HiveShim.createDefaultDBIfNeeded(this)
-
   /**
    * Resets the test instance by deleting any tables that have been created.
    * TODO: also clear out UDFs, views, etc.
@@ -401,24 +397,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       cacheManager.clearCache()
       loadedTables.clear()
       catalog.cachedDataSourceTables.invalidateAll()
-      catalog.client.getAllTables("default").foreach { t =>
-        logDebug(s"Deleting table $t")
-        val table = catalog.client.getTable("default", t)
-
-        catalog.client.getIndexes("default", t, 255).foreach { index =>
-          catalog.client.dropIndex("default", t, index.getIndexName, true)
-        }
-
-        if (!table.isIndexTable) {
-          catalog.client.dropTable("default", t)
-        }
-      }
-
-      catalog.client.getAllDatabases.filterNot(_ == "default").foreach { db =>
-        logDebug(s"Dropping Database: $db")
-        catalog.client.dropDatabase(db, true, false, true)
-      }
-
+      catalog.client.reset()
       catalog.unregisterAllTables()
 
       FunctionRegistry.getFunctionNames.filterNot(originalUdfs.contains(_)).foreach { udfName =>
@@ -429,7 +408,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       hiveconf.set("fs.default.name", new File(".").toURI.toString)
       // It is important that we RESET first as broken hooks that might have been set could break
       // other sql exec here.
-      runSqlHive("RESET")
+      executionHive.runSqlHive("RESET")
+      metadataHive.runSqlHive("RESET")
       // For some reason, RESET does not reset the following variables...
       // https://issues.apache.org/jira/browse/HIVE-9004
       runSqlHive("set hive.table.parameters.default=")
@@ -437,7 +417,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
       runSqlHive("set datanucleus.cache.collections.lazy=true")
       // Lots of tests fail if we do not change the partition whitelist from the default.
       runSqlHive("set hive.metastore.partition.name.whitelist.pattern=.*")
-      configure()
+
+      configure().foreach {
+        case (k, v) =>
+          metadataHive.runSqlHive(s"SET $k=$v")
+      }
 
       runSqlHive("USE default")
 
diff --git a/sql/hive/src/test/resources/log4j.properties b/sql/hive/src/test/resources/log4j.properties
index 5bc08062d30eb..92eaf1f2795ba 100644
--- a/sql/hive/src/test/resources/log4j.properties
+++ b/sql/hive/src/test/resources/log4j.properties
@@ -33,7 +33,7 @@ log4j.appender.FA.layout=org.apache.log4j.PatternLayout
 log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n
 
 # Set the logger level of File Appender to WARN
-log4j.appender.FA.Threshold = INFO
+log4j.appender.FA.Threshold = DEBUG
 
 # Some packages are noisy for no good reason.
 log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
index d960a30e00738..30f5313d2b812 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{OutputStream, PrintStream}
-
 import scala.util.Try
 
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.{AnalysisException, QueryTest}
@@ -109,25 +108,6 @@ class ErrorPositionSuite extends QueryTest with BeforeAndAfter {
       "SELECT 1 + array(1)", "1 + array")
   }
 
-  /** Hive can be very noisy, messing up the output of our tests. */
-  private def quietly[A](f: => A): A = {
-    val origErr = System.err
-    val origOut = System.out
-    try {
-      System.setErr(new PrintStream(new OutputStream {
-        def write(b: Int) = {}
-      }))
-      System.setOut(new PrintStream(new OutputStream {
-        def write(b: Int) = {}
-      }))
-
-      f
-    } finally {
-      System.setErr(origErr)
-      System.setOut(origOut)
-    }
-  }
-
   /**
    * Creates a test that checks to see if the error thrown when analyzing a given query includes
    * the location of the given token in the query string.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 0538aa203c5a0..47c60f651d14c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapred.InvalidInputException
 import org.apache.spark.sql._
 import org.apache.spark.util.Utils
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.hive.client.{HiveTable, ManagedTable}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.parquet.ParquetRelation2
@@ -686,16 +687,21 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
   test("SPARK-6655 still support a schema stored in spark.sql.sources.schema") {
     val tableName = "spark6655"
     val schema = StructType(StructField("int", IntegerType, true) :: Nil)
-    // Manually create the metadata in metastore.
-    val tbl = new Table("default", tableName)
-    tbl.setProperty("spark.sql.sources.provider", "json")
-    tbl.setProperty("spark.sql.sources.schema", schema.json)
-    tbl.setProperty("EXTERNAL", "FALSE")
-    tbl.setTableType(TableType.MANAGED_TABLE)
-    tbl.setSerdeParam("path", catalog.hiveDefaultTableFilePath(tableName))
-    catalog.synchronized {
-      catalog.client.createTable(tbl)
-    }
+
+    val hiveTable = HiveTable(
+      specifiedDatabase = Some("default"),
+      name = tableName,
+      schema = Seq.empty,
+      partitionColumns = Seq.empty,
+      properties = Map(
+        "spark.sql.sources.provider" -> "json",
+        "spark.sql.sources.schema" -> schema.json,
+        "EXTERNAL" -> "FALSE"),
+      tableType = ManagedTable,
+      serdeProperties = Map(
+        "path" -> catalog.hiveDefaultTableFilePath(tableName)))
+
+    catalog.client.createTable(hiveTable)
 
     invalidateTable(tableName)
     val actualSchema = table(tableName).schema
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala
index d6ddd539d159d..8afe5459d4f1b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala
@@ -26,8 +26,10 @@ import org.apache.spark.sql.hive.test.TestHive
 class SerializationSuite extends FunSuite {
 
   test("[SPARK-5840] HiveContext should be serializable") {
-    val hiveContext = new HiveContext(TestHive.sparkContext)
+    val hiveContext = TestHive
     hiveContext.hiveconf
-    new JavaSerializer(new SparkConf()).newInstance().serialize(hiveContext)
+    val serializer = new JavaSerializer(new SparkConf()).newInstance()
+    val bytes = serializer.serialize(hiveContext)
+    val deSer = serializer.deserialize[AnyRef](bytes)
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 81e77ba257bf1..321dc8d7322b8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -22,9 +22,13 @@ import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.util.Utils
 import org.scalatest.FunSuite
 
+/**
+ * A simple set of tests that call the methods of a hive ClientInterface, loading different version 
+ * of hive from maven central.  These tests are simple in that they are mostly just testing to make 
+ * sure that reflective calls are not throwing NoSuchMethod error, but the actually functionallity 
+ * is not fully tested.
+ */
 class VersionsSuite extends FunSuite with Logging {
-  val testType = "derby"
-
   private def buildConf() = {
     lazy val warehousePath = Utils.createTempDir()
     lazy val metastorePath = Utils.createTempDir()
@@ -50,6 +54,14 @@ class VersionsSuite extends FunSuite with Logging {
     causes
   }
 
+  private val emptyDir = Utils.createTempDir().getCanonicalPath
+
+  private def partSpec = {
+    val hashMap = new java.util.LinkedHashMap[String, String]
+    hashMap.put("key", "1")
+    hashMap
+  }
+
   // Its actually pretty easy to mess things up and have all of your tests "pass" by accidentally
   // connecting to an auto-populated, in-process metastore.  Let's make sure we are getting the
   // versions right by forcing a known compatibility failure.
@@ -66,10 +78,9 @@ class VersionsSuite extends FunSuite with Logging {
   private var client: ClientInterface = null
 
   versions.foreach { version =>
-    test(s"$version: listTables") {
+    test(s"$version: create client") {
       client = null
       client = IsolatedClientLoader.forVersion(version, buildConf()).client
-      client.listTables("default")
     }
 
     test(s"$version: createDatabase") {
@@ -101,5 +112,64 @@ class VersionsSuite extends FunSuite with Logging {
     test(s"$version: getTable") {
       client.getTable("default", "src")
     }
+
+    test(s"$version: listTables") {
+      assert(client.listTables("default") === Seq("src"))
+    }
+
+    test(s"$version: currentDatabase") {
+      assert(client.currentDatabase === "default")
+    }
+
+    test(s"$version: getDatabase") {
+      client.getDatabase("default")
+    }
+
+    test(s"$version: alterTable") {
+      client.alterTable(client.getTable("default", "src"))
+    }
+
+    test(s"$version: set command") {
+      client.runSqlHive("SET spark.sql.test.key=1")
+    }
+
+    test(s"$version: create partitioned table DDL") {
+      client.runSqlHive("CREATE TABLE src_part (value INT) PARTITIONED BY (key INT)")
+      client.runSqlHive("ALTER TABLE src_part ADD PARTITION (key = '1')")
+    }
+
+    test(s"$version: getPartitions") {
+      client.getAllPartitions(client.getTable("default", "src_part"))
+    }
+
+    test(s"$version: loadPartition") {
+      client.loadPartition(
+        emptyDir,
+        "default.src_part",
+        partSpec,
+        false,
+        false,
+        false,
+        false)
+    }
+
+    test(s"$version: loadTable") {
+      client.loadTable(
+        emptyDir,
+        "src",
+        false,
+        false)
+    }
+
+    test(s"$version: loadDynamicPartitions") {
+      client.loadDynamicPartitions(
+        emptyDir,
+        "default.src_part",
+        partSpec,
+        false,
+        1,
+        false,
+        false)
+    }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index a3eacbd4e3981..9c056e493bfde 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -300,6 +300,8 @@ abstract class HiveComparisonTest
 
             val hiveQueries = queryList.map(new TestHive.QueryExecution(_))
             // Make sure we can at least parse everything before attempting hive execution.
+            // Note this must only look at the logical plan as we might not be able to analyze if
+            // other DDL has not been executed yet.
             hiveQueries.foreach(_.logical)
             val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
               case ((queryString, i), hiveQuery, cachedAnswerFile)=>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index ac10b173307d8..7d728fe87bda7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -900,7 +900,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
       |DROP TABLE IF EXISTS dynamic_part_table;
     """.stripMargin)
 
-  test("Dynamic partition folder layout") {
+  ignore("Dynamic partition folder layout") {
     sql("DROP TABLE IF EXISTS dynamic_part_table")
     sql("CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1 INT, partcol2 INT)")
     sql("SET hive.exec.dynamic.partition.mode=nonstrict")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 45f10e2fe64aa..de6a41ce5bfcb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -150,20 +150,21 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
       val (actualScannedColumns, actualPartValues) = plan.collect {
         case p @ HiveTableScan(columns, relation, _) =>
           val columnNames = columns.map(_.name)
-          val partValues = p.prunePartitions(relation.hiveQlPartitions).map(_.getValues)
+          val partValues = if (relation.table.isPartitioned) {
+            p.prunePartitions(relation.hiveQlPartitions).map(_.getValues)
+          } else {
+            Seq.empty
+          }
           (columnNames, partValues)
       }.head
 
       assert(actualOutputColumns === expectedOutputColumns, "Output columns mismatch")
       assert(actualScannedColumns === expectedScannedColumns, "Scanned columns mismatch")
 
-      assert(
-        actualPartValues.length === expectedPartValues.length,
-        "Partition value count mismatches")
+      val actualPartitions = actualPartValues.map(_.toSeq.mkString(",")).sorted
+      val expectedPartitions = expectedPartValues.map(_.mkString(",")).sorted
 
-      for ((actual, expected) <- actualPartValues.zip(expectedPartValues)) {
-        assert(actual sameElements expected, "Partition values mismatch")
-      }
+      assert(actualPartitions === expectedPartitions, "Partitions selected do not match")
     }
 
     // Creates a query test to compare query results generated by Hive and Catalyst.

From 92f8f803a68e0c16771e9793098c6d76dfdf99af Mon Sep 17 00:00:00 2001
From: Shuo Xiang <shuoxiangpub@gmail.com>
Date: Thu, 7 May 2015 20:55:08 -0700
Subject: [PATCH 028/320] [SPARK-7452] [MLLIB] fix bug in topBykey and update
 test

the toArray function of the BoundedPriorityQueue does not necessarily preserve order. Add a counter-example as the test, which would fail the original impl.

Author: Shuo Xiang <shuoxiangpub@gmail.com>

Closes #5990 from coderxiang/topbykey-test and squashes the following commits:

98804c9 [Shuo Xiang] fix bug in topBykey and update test
---
 .../org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala  | 2 +-
 .../apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 5af55aaf84802..1b93e2d764c69 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
       combOp = (queue1, queue2) => {
         queue1 ++= queue2
       }
-    ).mapValues(_.toArray.reverse)  // This is an min-heap, so we reverse the order.
+    ).mapValues(_.toArray.sorted(ord.reverse))  // This is an min-heap, so we reverse the order.
   }
 }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
index cb8fe4dba96f5..57216e8eb4a55 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
 
 class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
   test("topByKey") {
-    val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2)
-      .topByKey(2)
+    val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5,
+      1), (3, 5)), 2)
+      .topByKey(5)
       .collectAsMap()
 
     assert(topMap.size === 3)
-    assert(topMap(1) === Array(2, 1))
-    assert(topMap(3) === Array(7, 5))
+    assert(topMap(1) === Array(7, 6, 3, 2, 1))
+    assert(topMap(3) === Array(7, 5, 2))
     assert(topMap(5) === Array(1))
   }
 }

From 3af423c92f117b5dd4dc6832dc50911cedb29abc Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 7 May 2015 20:59:42 -0700
Subject: [PATCH 029/320] [SPARK-6986] [SQL] Use Serializer2 in more cases.

With https://github.com/apache/spark/commit/0a2b15ce43cf6096e1a7ae060b7c8a4010ce3b92, the serialization stream and deserialization stream has enough information to determine it is handling a key-value pari, a key, or a value. It is safe to use `SparkSqlSerializer2` in more cases.

Author: Yin Huai <yhuai@databricks.com>

Closes #5849 from yhuai/serializer2MoreCases and squashes the following commits:

53a5eaa [Yin Huai] Josh's comments.
487f540 [Yin Huai] Use BufferedOutputStream.
8385f95 [Yin Huai] Always create a new row at the deserialization side to work with sort merge join.
c7e2129 [Yin Huai] Update tests.
4513d13 [Yin Huai] Use Serializer2 in more places.
---
 .../apache/spark/sql/execution/Exchange.scala | 23 ++----
 .../sql/execution/SparkSqlSerializer2.scala   | 74 ++++++++++++-------
 .../execution/SparkSqlSerializer2Suite.scala  | 30 ++++----
 3 files changed, 69 insertions(+), 58 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 5b2e46962cd3b..f0d54cd6cd94f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -84,18 +84,8 @@ case class Exchange(
   def serializer(
       keySchema: Array[DataType],
       valueSchema: Array[DataType],
+      hasKeyOrdering: Boolean,
       numPartitions: Int): Serializer = {
-    // In ExternalSorter's spillToMergeableFile function, key-value pairs are written out
-    // through write(key) and then write(value) instead of write((key, value)). Because
-    // SparkSqlSerializer2 assumes that objects passed in are Product2, we cannot safely use
-    // it when spillToMergeableFile in ExternalSorter will be used.
-    // So, we will not use SparkSqlSerializer2 when
-    //  - Sort-based shuffle is enabled and the number of reducers (numPartitions) is greater
-    //     then the bypassMergeThreshold; or
-    //  - newOrdering is defined.
-    val cannotUseSqlSerializer2 =
-      (sortBasedShuffleOn && numPartitions > bypassMergeThreshold) || newOrdering.nonEmpty
-
     // It is true when there is no field that needs to be write out.
     // For now, we will not use SparkSqlSerializer2 when noField is true.
     val noField =
@@ -104,14 +94,13 @@ case class Exchange(
 
     val useSqlSerializer2 =
         child.sqlContext.conf.useSqlSerializer2 &&   // SparkSqlSerializer2 is enabled.
-        !cannotUseSqlSerializer2 &&                  // Safe to use Serializer2.
         SparkSqlSerializer2.support(keySchema) &&    // The schema of key is supported.
         SparkSqlSerializer2.support(valueSchema) &&  // The schema of value is supported.
         !noField
 
     val serializer = if (useSqlSerializer2) {
       logInfo("Using SparkSqlSerializer2.")
-      new SparkSqlSerializer2(keySchema, valueSchema)
+      new SparkSqlSerializer2(keySchema, valueSchema, hasKeyOrdering)
     } else {
       logInfo("Using SparkSqlSerializer.")
       new SparkSqlSerializer(sparkConf)
@@ -154,7 +143,8 @@ case class Exchange(
           }
         val keySchema = expressions.map(_.dataType).toArray
         val valueSchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(keySchema, valueSchema, numPartitions))
+        shuffled.setSerializer(
+          serializer(keySchema, valueSchema, newOrdering.nonEmpty, numPartitions))
 
         shuffled.map(_._2)
 
@@ -179,7 +169,8 @@ case class Exchange(
             new ShuffledRDD[Row, Null, Null](rdd, part)
           }
         val keySchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(keySchema, null, numPartitions))
+        shuffled.setSerializer(
+          serializer(keySchema, null, newOrdering.nonEmpty, numPartitions))
 
         shuffled.map(_._1)
 
@@ -199,7 +190,7 @@ case class Exchange(
         val partitioner = new HashPartitioner(1)
         val shuffled = new ShuffledRDD[Null, Row, Row](rdd, partitioner)
         val valueSchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(null, valueSchema, 1))
+        shuffled.setSerializer(serializer(null, valueSchema, false, 1))
         shuffled.map(_._2)
 
       case _ => sys.error(s"Exchange not implemented for $newPartitioning")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala
index 35ad987eb1a63..256d527d7b636 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala
@@ -27,7 +27,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.serializer._
 import org.apache.spark.Logging
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.expressions.SpecificMutableRow
+import org.apache.spark.sql.catalyst.expressions.{SpecificMutableRow, MutableRow, GenericMutableRow}
 import org.apache.spark.sql.types._
 
 /**
@@ -49,9 +49,9 @@ private[sql] class Serializer2SerializationStream(
     out: OutputStream)
   extends SerializationStream with Logging {
 
-  val rowOut = new DataOutputStream(out)
-  val writeKeyFunc = SparkSqlSerializer2.createSerializationFunction(keySchema, rowOut)
-  val writeValueFunc = SparkSqlSerializer2.createSerializationFunction(valueSchema, rowOut)
+  private val rowOut = new DataOutputStream(new BufferedOutputStream(out))
+  private val writeKeyFunc = SparkSqlSerializer2.createSerializationFunction(keySchema, rowOut)
+  private val writeValueFunc = SparkSqlSerializer2.createSerializationFunction(valueSchema, rowOut)
 
   override def writeObject[T: ClassTag](t: T): SerializationStream = {
     val kv = t.asInstanceOf[Product2[Row, Row]]
@@ -86,31 +86,44 @@ private[sql] class Serializer2SerializationStream(
 private[sql] class Serializer2DeserializationStream(
     keySchema: Array[DataType],
     valueSchema: Array[DataType],
+    hasKeyOrdering: Boolean,
     in: InputStream)
   extends DeserializationStream with Logging  {
 
-  val rowIn = new DataInputStream(new BufferedInputStream(in))
+  private val rowIn = new DataInputStream(new BufferedInputStream(in))
+
+  private def rowGenerator(schema: Array[DataType]): () => (MutableRow) = {
+    if (schema == null) {
+      () => null
+    } else {
+      if (hasKeyOrdering) {
+        // We have key ordering specified in a ShuffledRDD, it is not safe to reuse a mutable row.
+        () => new GenericMutableRow(schema.length)
+      } else {
+        // It is safe to reuse the mutable row.
+        val mutableRow = new SpecificMutableRow(schema)
+        () => mutableRow
+      }
+    }
+  }
 
-  val key = if (keySchema != null) new SpecificMutableRow(keySchema) else null
-  val value = if (valueSchema != null) new SpecificMutableRow(valueSchema) else null
-  val readKeyFunc = SparkSqlSerializer2.createDeserializationFunction(keySchema, rowIn, key)
-  val readValueFunc = SparkSqlSerializer2.createDeserializationFunction(valueSchema, rowIn, value)
+  // Functions used to return rows for key and value.
+  private val getKey = rowGenerator(keySchema)
+  private val getValue = rowGenerator(valueSchema)
+  // Functions used to read a serialized row from the InputStream and deserialize it.
+  private val readKeyFunc = SparkSqlSerializer2.createDeserializationFunction(keySchema, rowIn)
+  private val readValueFunc = SparkSqlSerializer2.createDeserializationFunction(valueSchema, rowIn)
 
   override def readObject[T: ClassTag](): T = {
-    readKeyFunc()
-    readValueFunc()
-
-    (key, value).asInstanceOf[T]
+    (readKeyFunc(getKey()), readValueFunc(getValue())).asInstanceOf[T]
   }
 
   override def readKey[T: ClassTag](): T = {
-    readKeyFunc()
-    key.asInstanceOf[T]
+    readKeyFunc(getKey()).asInstanceOf[T]
   }
 
   override def readValue[T: ClassTag](): T = {
-    readValueFunc()
-    value.asInstanceOf[T]
+    readValueFunc(getValue()).asInstanceOf[T]
   }
 
   override def close(): Unit = {
@@ -118,9 +131,10 @@ private[sql] class Serializer2DeserializationStream(
   }
 }
 
-private[sql] class ShuffleSerializerInstance(
+private[sql] class SparkSqlSerializer2Instance(
     keySchema: Array[DataType],
-    valueSchema: Array[DataType])
+    valueSchema: Array[DataType],
+    hasKeyOrdering: Boolean)
   extends SerializerInstance {
 
   def serialize[T: ClassTag](t: T): ByteBuffer =
@@ -137,7 +151,7 @@ private[sql] class ShuffleSerializerInstance(
   }
 
   def deserializeStream(s: InputStream): DeserializationStream = {
-    new Serializer2DeserializationStream(keySchema, valueSchema, s)
+    new Serializer2DeserializationStream(keySchema, valueSchema, hasKeyOrdering, s)
   }
 }
 
@@ -148,12 +162,16 @@ private[sql] class ShuffleSerializerInstance(
  * The schema of keys is represented by `keySchema` and that of values is represented by
  * `valueSchema`.
  */
-private[sql] class SparkSqlSerializer2(keySchema: Array[DataType], valueSchema: Array[DataType])
+private[sql] class SparkSqlSerializer2(
+    keySchema: Array[DataType],
+    valueSchema: Array[DataType],
+    hasKeyOrdering: Boolean)
   extends Serializer
   with Logging
   with Serializable{
 
-  def newInstance(): SerializerInstance = new ShuffleSerializerInstance(keySchema, valueSchema)
+  def newInstance(): SerializerInstance =
+    new SparkSqlSerializer2Instance(keySchema, valueSchema, hasKeyOrdering)
 
   override def supportsRelocationOfSerializedObjects: Boolean = {
     // SparkSqlSerializer2 is stateless and writes no stream headers
@@ -323,11 +341,11 @@ private[sql] object SparkSqlSerializer2 {
    */
   def createDeserializationFunction(
       schema: Array[DataType],
-      in: DataInputStream,
-      mutableRow: SpecificMutableRow): () => Unit = {
-    () => {
-      // If the schema is null, the returned function does nothing when it get called.
-      if (schema != null) {
+      in: DataInputStream): (MutableRow) => Row = {
+    if (schema == null) {
+      (mutableRow: MutableRow) => null
+    } else {
+      (mutableRow: MutableRow) => {
         var i = 0
         while (i < schema.length) {
           schema(i) match {
@@ -440,6 +458,8 @@ private[sql] object SparkSqlSerializer2 {
           }
           i += 1
         }
+
+        mutableRow
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala
index 27f063d73a9a9..15337c4045436 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala
@@ -148,6 +148,15 @@ abstract class SparkSqlSerializer2Suite extends QueryTest with BeforeAndAfterAll
       table("shuffle").collect())
   }
 
+  test("key schema is null") {
+    val aggregations = allColumns.split(",").map(c => s"COUNT($c)").mkString(",")
+    val df = sql(s"SELECT $aggregations FROM shuffle")
+    checkSerializer(df.queryExecution.executedPlan, serializerClass)
+    checkAnswer(
+      df,
+      Row(1000, 1000, 0, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000))
+  }
+
   test("value schema is null") {
     val df = sql(s"SELECT col0 FROM shuffle ORDER BY col0")
     checkSerializer(df.queryExecution.executedPlan, serializerClass)
@@ -167,29 +176,20 @@ class SparkSqlSerializer2SortShuffleSuite extends SparkSqlSerializer2Suite {
   override def beforeAll(): Unit = {
     super.beforeAll()
     // Sort merge will not be triggered.
-    sql("set spark.sql.shuffle.partitions = 200")
-  }
-
-  test("key schema is null") {
-    val aggregations = allColumns.split(",").map(c => s"COUNT($c)").mkString(",")
-    val df = sql(s"SELECT $aggregations FROM shuffle")
-    checkSerializer(df.queryExecution.executedPlan, serializerClass)
-    checkAnswer(
-      df,
-      Row(1000, 1000, 0, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000))
+    val bypassMergeThreshold =
+      sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+    sql(s"set spark.sql.shuffle.partitions=${bypassMergeThreshold-1}")
   }
 }
 
 /** For now, we will use SparkSqlSerializer for sort based shuffle with sort merge. */
 class SparkSqlSerializer2SortMergeShuffleSuite extends SparkSqlSerializer2Suite {
 
-  // We are expecting SparkSqlSerializer.
-  override val serializerClass: Class[Serializer] =
-    classOf[SparkSqlSerializer].asInstanceOf[Class[Serializer]]
-
   override def beforeAll(): Unit = {
     super.beforeAll()
     // To trigger the sort merge.
-    sql("set spark.sql.shuffle.partitions = 201")
+    val bypassMergeThreshold =
+      sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+    sql(s"set spark.sql.shuffle.partitions=${bypassMergeThreshold + 1}")
   }
 }

From 714db2ef52c0fe34418e252e5a6f220337022046 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 7 May 2015 22:32:13 -0700
Subject: [PATCH 030/320] [SPARK-7470] [SQL] Spark shell SQLContext crashes
 without hive

This only happens if you have `SPARK_PREPEND_CLASSES` set. Then I built it with `build/sbt clean assembly compile` and just ran it with `bin/spark-shell`.
```
...
15/05/07 17:07:30 INFO EventLoggingListener: Logging events to file:/tmp/spark-events/local-1431043649919
15/05/07 17:07:30 INFO SparkILoop: Created spark context..
Spark context available as sc.
java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf
	at java.lang.Class.getDeclaredConstructors0(Native Method)
	at java.lang.Class.privateGetDeclaredConstructors(Class.java:2493)
	at java.lang.Class.getConstructor0(Class.java:2803)
...
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.conf.HiveConf
	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
	at java.security.AccessController.doPrivileged(Native Method)
	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
	... 52 more

<console>:10: error: not found: value sqlContext
       import sqlContext.implicits._
              ^
<console>:10: error: not found: value sqlContext
       import sqlContext.sql
              ^
```
yhuai marmbrus

Author: Andrew Or <andrew@databricks.com>

Closes #5997 from andrewor14/sql-shell-crash and squashes the following commits:

61147e6 [Andrew Or] Also expect NoClassDefFoundError
---
 .../src/main/scala/org/apache/spark/repl/SparkILoop.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 8dc0e0c965923..488f3a9f33256 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -1028,7 +1028,7 @@ class SparkILoop(
       logInfo("Created sql context (with Hive support)..")
     }
     catch {
-      case cnf: java.lang.ClassNotFoundException =>
+      case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError =>
         sqlContext = new SQLContext(sparkContext)
         logInfo("Created sql context..")
     }

From f496bf3c539a873ffdf3aa803847ef7b50135bd7 Mon Sep 17 00:00:00 2001
From: wangfei <wangfei1@huawei.com>
Date: Thu, 7 May 2015 22:55:42 -0700
Subject: [PATCH 031/320] [SPARK-7232] [SQL] Add a Substitution batch for spark
 sql analyzer

  Added a new batch named `Substitution` before `Resolution` batch. The motivation for this is there are kind of cases we want to do some substitution on the parsed logical plan before resolve it.
Consider this two cases:
1 CTE, for cte we first build a row logical plan
```
'With Map(q1 -> 'Subquery q1
                   'Project ['key]
                      'UnresolvedRelation [src], None)
 'Project [*]
  'Filter ('key = 5)
   'UnresolvedRelation [q1], None
```
In `With` logicalplan here is a map stored the (`q1-> subquery`), we want first take off the with command and substitute the  `q1` of `UnresolvedRelation` by the `subquery`

2 Another example is Window function, in window function user may define some windows, we also need substitute the window name of child by the concrete window. this should also done in the Substitution batch.

Author: wangfei <wangfei1@huawei.com>

Closes #5776 from scwf/addbatch and squashes the following commits:

d4b962f [wangfei] added WindowsSubstitution
70f6932 [wangfei] Merge branch 'master' of https://github.com/apache/spark into addbatch
ecaeafb [wangfei] address yhuai's comments
553005a [wangfei] fix test case
0c54798 [wangfei] address comments
29aaaaf [wangfei] fix compile
1c9a092 [wangfei] added Substitution bastch
---
 .../sql/catalyst/analysis/Analyzer.scala      | 98 ++++++++++++-------
 1 file changed, 60 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7e46ad851cdd3..bb7913e186a85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -55,6 +55,10 @@ class Analyzer(
   val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Nil
 
   lazy val batches: Seq[Batch] = Seq(
+    Batch("Substitution", fixedPoint,
+      CTESubstitution ::
+      WindowsSubstitution ::
+      Nil : _*),
     Batch("Resolution", fixedPoint,
       ResolveRelations ::
       ResolveReferences ::
@@ -71,6 +75,55 @@ class Analyzer(
       extendedResolutionRules : _*)
   )
 
+  /**
+   * Substitute child plan with cte definitions
+   */
+  object CTESubstitution extends Rule[LogicalPlan] {
+    // TODO allow subquery to define CTE
+    def apply(plan: LogicalPlan): LogicalPlan = plan match {
+      case With(child, relations) => substituteCTE(child, relations)
+      case other => other
+    }
+
+    def substituteCTE(plan: LogicalPlan, cteRelations: Map[String, LogicalPlan]): LogicalPlan = {
+      plan transform {
+        // In hive, if there is same table name in database and CTE definition,
+        // hive will use the table in database, not the CTE one.
+        // Taking into account the reasonableness and the implementation complexity,
+        // here use the CTE definition first, check table name only and ignore database name
+        // see https://github.com/apache/spark/pull/4929#discussion_r27186638 for more info
+        case u : UnresolvedRelation =>
+          val substituted = cteRelations.get(u.tableIdentifier.last).map { relation =>
+            val withAlias = u.alias.map(Subquery(_, relation))
+            withAlias.getOrElse(relation)
+          }
+          substituted.getOrElse(u)
+      }
+    }
+  }
+
+  /**
+   * Substitute child plan with WindowSpecDefinitions.
+   */
+  object WindowsSubstitution extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      // Lookup WindowSpecDefinitions. This rule works with unresolved children.
+      case WithWindowDefinition(windowDefinitions, child) =>
+        child.transform {
+          case plan => plan.transformExpressions {
+            case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
+              val errorMessage =
+                s"Window specification $windowName is not defined in the WINDOW clause."
+              val windowSpecDefinition =
+                windowDefinitions
+                  .get(windowName)
+                  .getOrElse(failAnalysis(errorMessage))
+              WindowExpression(c, windowSpecDefinition)
+          }
+        }
+    }
+  }
+
   /**
    * Removes no-op Alias expressions from the plan.
    */
@@ -172,36 +225,20 @@ class Analyzer(
    * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog.
    */
   object ResolveRelations extends Rule[LogicalPlan] {
-    def getTable(u: UnresolvedRelation, cteRelations: Map[String, LogicalPlan]): LogicalPlan = {
+    def getTable(u: UnresolvedRelation): LogicalPlan = {
       try {
-        // In hive, if there is same table name in database and CTE definition,
-        // hive will use the table in database, not the CTE one.
-        // Taking into account the reasonableness and the implementation complexity,
-        // here use the CTE definition first, check table name only and ignore database name
-        cteRelations.get(u.tableIdentifier.last)
-          .map(relation => u.alias.map(Subquery(_, relation)).getOrElse(relation))
-          .getOrElse(catalog.lookupRelation(u.tableIdentifier, u.alias))
+        catalog.lookupRelation(u.tableIdentifier, u.alias)
       } catch {
         case _: NoSuchTableException =>
           u.failAnalysis(s"no such table ${u.tableName}")
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = {
-      val (realPlan, cteRelations) = plan match {
-        // TODO allow subquery to define CTE
-        // Add cte table to a temp relation map,drop `with` plan and keep its child
-        case With(child, relations) => (child, relations)
-        case other => (other, Map.empty[String, LogicalPlan])
-      }
-
-      realPlan transform {
-        case i@InsertIntoTable(u: UnresolvedRelation, _, _, _, _) =>
-          i.copy(
-            table = EliminateSubQueries(getTable(u, cteRelations)))
-        case u: UnresolvedRelation =>
-          getTable(u, cteRelations)
-      }
+    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+      case i@InsertIntoTable(u: UnresolvedRelation, _, _, _, _) =>
+        i.copy(table = EliminateSubQueries(getTable(u)))
+      case u: UnresolvedRelation =>
+        getTable(u)
     }
   }
 
@@ -664,21 +701,6 @@ class Analyzer(
     // We have to use transformDown at here to make sure the rule of
     // "Aggregate with Having clause" will be triggered.
     def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
-      // Lookup WindowSpecDefinitions. This rule works with unresolved children.
-      case WithWindowDefinition(windowDefinitions, child) =>
-        child.transform {
-          case plan => plan.transformExpressions {
-            case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
-              val errorMessage =
-                s"Window specification $windowName is not defined in the WINDOW clause."
-              val windowSpecDefinition =
-                windowDefinitions
-                  .get(windowName)
-                  .getOrElse(failAnalysis(errorMessage))
-              WindowExpression(c, windowSpecDefinition)
-          }
-        }
-
       // Aggregate with Having clause. This rule works with an unresolved Aggregate because
       // a resolved Aggregate will not have Window Functions.
       case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))

From c2f0821aad3b82dcd327e914c9b297e92526649d Mon Sep 17 00:00:00 2001
From: "Zhang, Liye" <liye.zhang@intel.com>
Date: Fri, 8 May 2015 09:10:58 +0100
Subject: [PATCH 032/320] [SPARK-7392] [CORE] bugfix: Kryo buffer size cannot
 be larger than 2M

Author: Zhang, Liye <liye.zhang@intel.com>

Closes #5934 from liyezhang556520/kryoBufSize and squashes the following commits:

5707e04 [Zhang, Liye] fix import order
8693288 [Zhang, Liye] replace multiplier with ByteUnit methods
9bf93e9 [Zhang, Liye] add tests
d91e5ed [Zhang, Liye] change kb to mb
---
 .../spark/serializer/KryoSerializer.scala     | 11 +++----
 .../serializer/KryoSerializerSuite.scala      | 30 +++++++++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index f9f78852f032b..64ba27f34d2f1 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -32,6 +32,7 @@ import org.apache.spark._
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.broadcast.HttpBroadcast
 import org.apache.spark.network.nio.{GetBlock, GotBlock, PutBlock}
+import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
 import org.apache.spark.util.BoundedPriorityQueue
@@ -51,18 +52,18 @@ class KryoSerializer(conf: SparkConf)
 
   private val bufferSizeKb = conf.getSizeAsKb("spark.kryoserializer.buffer", "64k")
   
-  if (bufferSizeKb >= 2048) {
+  if (bufferSizeKb >= ByteUnit.GiB.toKiB(2)) {
     throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than " +
-      s"2048 mb, got: + $bufferSizeKb mb.")
+      s"2048 mb, got: + ${ByteUnit.KiB.toMiB(bufferSizeKb)} mb.")
   }
-  private val bufferSize = (bufferSizeKb * 1024).toInt
+  private val bufferSize = ByteUnit.KiB.toBytes(bufferSizeKb).toInt
 
   val maxBufferSizeMb = conf.getSizeAsMb("spark.kryoserializer.buffer.max", "64m").toInt
-  if (maxBufferSizeMb >= 2048) {
+  if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2)) {
     throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than " +
       s"2048 mb, got: + $maxBufferSizeMb mb.")
   }
-  private val maxBufferSize = maxBufferSizeMb * 1024 * 1024
+  private val maxBufferSize = ByteUnit.MiB.toBytes(maxBufferSizeMb).toInt
 
   private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true)
   private val registrationRequired = conf.getBoolean("spark.kryo.registrationRequired", false)
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 778a7eee73b23..c7369de24b81f 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -32,6 +32,36 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
   conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
 
+  test("configuration limits") {
+    val conf1 = conf.clone()
+    val kryoBufferProperty = "spark.kryoserializer.buffer"
+    val kryoBufferMaxProperty = "spark.kryoserializer.buffer.max"
+    conf1.set(kryoBufferProperty, "64k")
+    conf1.set(kryoBufferMaxProperty, "64m")
+    new KryoSerializer(conf1).newInstance()
+    // 2048m = 2097152k
+    conf1.set(kryoBufferProperty, "2097151k")
+    conf1.set(kryoBufferMaxProperty, "64m")
+    // should not throw exception when kryoBufferMaxProperty < kryoBufferProperty
+    new KryoSerializer(conf1).newInstance()
+    conf1.set(kryoBufferMaxProperty, "2097151k")
+    new KryoSerializer(conf1).newInstance()
+    val conf2 = conf.clone()
+    conf2.set(kryoBufferProperty, "2048m")
+    val thrown1 = intercept[IllegalArgumentException](new KryoSerializer(conf2).newInstance())
+    assert(thrown1.getMessage.contains(kryoBufferProperty))
+    val conf3 = conf.clone()
+    conf3.set(kryoBufferMaxProperty, "2048m")
+    val thrown2 = intercept[IllegalArgumentException](new KryoSerializer(conf3).newInstance())
+    assert(thrown2.getMessage.contains(kryoBufferMaxProperty))
+    val conf4 = conf.clone()
+    conf4.set(kryoBufferProperty, "2g")
+    conf4.set(kryoBufferMaxProperty, "3g")
+    val thrown3 = intercept[IllegalArgumentException](new KryoSerializer(conf4).newInstance())
+    assert(thrown3.getMessage.contains(kryoBufferProperty))
+    assert(!thrown3.getMessage.contains(kryoBufferMaxProperty))
+  }
+  
   test("basic types") {
     val ser = new KryoSerializer(conf).newInstance()
     def check[T: ClassTag](t: T) {

From ebff7327af5efa9f57c605284de4fae6b050ae0f Mon Sep 17 00:00:00 2001
From: Lianhui Wang <lianhuiwang09@gmail.com>
Date: Fri, 8 May 2015 08:44:46 -0500
Subject: [PATCH 033/320] [SPARK-6869] [PYSPARK] Add pyspark archives path to
 PYTHONPATH

Based on https://github.com/apache/spark/pull/5478 that provide a PYSPARK_ARCHIVES_PATH env. within this PR, we just should export PYSPARK_ARCHIVES_PATH=/user/spark/pyspark.zip,/user/spark/python/lib/py4j-0.8.2.1-src.zip in conf/spark-env.sh when we don't install PySpark on each node of Yarn. i run python application successfully on yarn-client and yarn-cluster with this PR.
andrewor14 sryza Sephiroth-Lin Can you take a look at this?thanks.

Author: Lianhui Wang <lianhuiwang09@gmail.com>

Closes #5580 from lianhuiwang/SPARK-6869 and squashes the following commits:

66ffa43 [Lianhui Wang] Update Client.scala
c2ad0f9 [Lianhui Wang] Update Client.scala
1c8f664 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
008850a [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
f0b4ed8 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
150907b [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869
20402cd [Lianhui Wang] use ZipEntry
9d87c3f [Lianhui Wang] update scala style
e7bd971 [Lianhui Wang] address vanzin's comments
4b8a3ed [Lianhui Wang] use pyArchivesEnvOpt
e6b573b [Lianhui Wang] address vanzin's comments
f11f84a [Lianhui Wang] zip pyspark archives
5192cca [Lianhui Wang] update import path
3b1e4c8 [Lianhui Wang] address tgravescs's comments
9396346 [Lianhui Wang] put zip to make-distribution.sh
0d2baf7 [Lianhui Wang] update import paths
e0179be [Lianhui Wang] add zip pyspark archives in build or sparksubmit
31e8e06 [Lianhui Wang] update code style
9f31dac [Lianhui Wang] update code and add comments
f72987c [Lianhui Wang] add archives path to PYTHONPATH
---
 assembly/pom.xml                              | 21 ++++++++++
 .../org/apache/spark/deploy/SparkSubmit.scala | 41 +++++++++++++++++++
 project/SparkBuild.scala                      | 37 ++++++++++++++++-
 .../org/apache/spark/deploy/yarn/Client.scala | 23 ++++++++---
 4 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2b4d0a990bf22..626c8577e31fe 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -92,6 +92,27 @@
           <skip>true</skip>
         </configuration>
       </plugin>
+        <!-- zip pyspark archives to run python application on yarn mode -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <executions>
+              <execution>
+                <phase>package</phase>
+                  <goals>
+                    <goal>run</goal>
+                  </goals>
+              </execution>
+            </executions>
+            <configuration>
+              <target>
+                <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+                <zip destfile="${basedir}/../python/lib/pyspark.zip">
+                  <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
+                </zip>
+              </target>
+            </configuration>
+        </plugin>
       <!-- Use the shade plugin to create a big JAR with all the dependencies -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 8a0327984e195..329fa06ba8ba5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -332,6 +332,47 @@ object SparkSubmit {
       }
     }
 
+    // In yarn mode for a python app, add pyspark archives to files
+    // that can be distributed with the job
+    if (args.isPython && clusterManager == YARN) {
+      var pyArchives: String = null
+      val pyArchivesEnvOpt = sys.env.get("PYSPARK_ARCHIVES_PATH")
+      if (pyArchivesEnvOpt.isDefined) {
+        pyArchives = pyArchivesEnvOpt.get
+      } else {
+        if (!sys.env.contains("SPARK_HOME")) {
+          printErrorAndExit("SPARK_HOME does not exist for python application in yarn mode.")
+        }
+        val pythonPath = new ArrayBuffer[String]
+        for (sparkHome <- sys.env.get("SPARK_HOME")) {
+          val pyLibPath = Seq(sparkHome, "python", "lib").mkString(File.separator)
+          val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
+          if (!pyArchivesFile.exists()) {
+            printErrorAndExit("pyspark.zip does not exist for python application in yarn mode.")
+          }
+          val py4jFile = new File(pyLibPath, "py4j-0.8.2.1-src.zip")
+          if (!py4jFile.exists()) {
+            printErrorAndExit("py4j-0.8.2.1-src.zip does not exist for python application " +
+              "in yarn mode.")
+          }
+          pythonPath += pyArchivesFile.getAbsolutePath()
+          pythonPath += py4jFile.getAbsolutePath()
+        }
+        pyArchives = pythonPath.mkString(",")
+      }
+
+      pyArchives = pyArchives.split(",").map { localPath=>
+        val localURI = Utils.resolveURI(localPath)
+        if (localURI.getScheme != "local") {
+          args.files = mergeFileLists(args.files, localURI.toString)
+          new Path(localPath).getName
+        } else {
+          localURI.getPath
+        }
+      }.mkString(File.pathSeparator)
+      sysProps("spark.submit.pyArchives") = pyArchives
+    }
+
     // If we're running a R app, set the main class to our specific R runner
     if (args.isR && deployMode == CLIENT) {
       if (args.primaryResource == SPARKR_SHELL) {
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 026855f8f6a5a..186345af0e60e 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -370,6 +370,7 @@ object Assembly {
 object PySparkAssembly {
   import sbtassembly.Plugin._
   import AssemblyKeys._
+  import java.util.zip.{ZipOutputStream, ZipEntry}
 
   lazy val settings = Seq(
     unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
@@ -377,16 +378,48 @@ object PySparkAssembly {
     // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
     // list since that will copy unneeded / unwanted files.
     resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
+      val src = new File(BuildCommons.sparkHome, "python/pyspark")
+
+      val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
+      zipFile.delete()
+      zipRecursive(src, zipFile)
+
       val dst = new File(outDir, "pyspark")
       if (!dst.isDirectory()) {
         require(dst.mkdirs())
       }
-
-      val src = new File(BuildCommons.sparkHome, "python/pyspark")
       copy(src, dst)
     }
   )
 
+  private def zipRecursive(source: File, destZipFile: File) = {
+    val destOutput = new ZipOutputStream(new FileOutputStream(destZipFile))
+    addFilesToZipStream("", source, destOutput)
+    destOutput.flush()
+    destOutput.close()
+  }
+
+  private def addFilesToZipStream(parent: String, source: File, output: ZipOutputStream): Unit = {
+    if (source.isDirectory()) {
+      output.putNextEntry(new ZipEntry(parent + source.getName()))
+      for (file <- source.listFiles()) {
+        addFilesToZipStream(parent + source.getName() + File.separator, file, output)
+      }
+    } else {
+      val in = new FileInputStream(source)
+      output.putNextEntry(new ZipEntry(parent + source.getName()))
+      val buf = new Array[Byte](8192)
+      var n = 0
+      while (n != -1) {
+        n = in.read(buf)
+        if (n != -1) {
+          output.write(buf, 0, n)
+        }
+      }
+      in.close()
+    }
+  }
+
   private def copy(src: File, dst: File): Seq[File] = {
     src.listFiles().flatMap { f =>
       val child = new File(dst, f.getName())
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 20ecaf092e3f8..d21a7393478ce 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -468,6 +468,17 @@ private[spark] class Client(
       env("SPARK_YARN_USER_ENV") = userEnvs
     }
 
+    // if spark.submit.pyArchives is in sparkConf, append pyArchives to PYTHONPATH
+    // that can be passed on to the ApplicationMaster and the executors.
+    if (sparkConf.contains("spark.submit.pyArchives")) {
+      var pythonPath = sparkConf.get("spark.submit.pyArchives")
+      if (env.contains("PYTHONPATH")) {
+        pythonPath = Seq(env.get("PYTHONPATH"), pythonPath).mkString(File.pathSeparator)
+      }
+      env("PYTHONPATH") = pythonPath
+      sparkConf.setExecutorEnv("PYTHONPATH", pythonPath)
+    }
+
     // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to
     // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's
     // SparkContext will not let that set spark* system properties, which is expected behavior for
@@ -1074,7 +1085,7 @@ object Client extends Logging {
         val hiveConf = hiveClass.getMethod("getConf").invoke(hive)
         val hiveConfClass = mirror.classLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")
 
-        val hiveConfGet = (param:String) => Option(hiveConfClass
+        val hiveConfGet = (param: String) => Option(hiveConfClass
           .getMethod("get", classOf[java.lang.String])
           .invoke(hiveConf, param))
 
@@ -1096,7 +1107,7 @@ object Client extends Logging {
 
             val hive2Token = new Token[DelegationTokenIdentifier]()
             hive2Token.decodeFromUrlString(tokenStr)
-            credentials.addToken(new Text("hive.server2.delegation.token"),hive2Token)
+            credentials.addToken(new Text("hive.server2.delegation.token"), hive2Token)
             logDebug("Added hive.Server2.delegation.token to conf.")
             hiveClass.getMethod("closeCurrent").invoke(null)
           } else {
@@ -1141,13 +1152,13 @@ object Client extends Logging {
 
         logInfo("Added HBase security token to credentials.")
       } catch {
-        case e:java.lang.NoSuchMethodException =>
+        case e: java.lang.NoSuchMethodException =>
           logInfo("HBase Method not found: " + e)
-        case e:java.lang.ClassNotFoundException =>
+        case e: java.lang.ClassNotFoundException =>
           logDebug("HBase Class not found: " + e)
-        case e:java.lang.NoClassDefFoundError =>
+        case e: java.lang.NoClassDefFoundError =>
           logDebug("HBase Class not found: " + e)
-        case e:Exception =>
+        case e: Exception =>
           logError("Exception when obtaining HBase security token: " + e)
       }
     }

From c796be70f36e262b6a2ce75924bd970f40bf4045 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 8 May 2015 16:54:32 +0100
Subject: [PATCH 034/320] [SPARK-3454] separate json endpoints for data in the
 UI

Exposes data available in the UI as json over http.  Key points:

* new endpoints, handled independently of existing XyzPage classes.  Root entrypoint is `JsonRootResource`
* Uses jersey + jackson for routing & converting POJOs into json
* tests against known results in `HistoryServerSuite`
* also fixes some minor issues w/ the UI -- synchronizing on access to `StorageListener` & `StorageStatusListener`, and fixing some inconsistencies w/ the way we handle retained jobs & stages.

Author: Imran Rashid <irashid@cloudera.com>

Closes #5940 from squito/SPARK-3454_better_test_files and squashes the following commits:

1a72ed6 [Imran Rashid] rats
85fdb3e [Imran Rashid] Merge branch 'no_php' into SPARK-3454
1fc65b0 [Imran Rashid] Revert "Revert "[SPARK-3454] separate json endpoints for data in the UI""
1276900 [Imran Rashid] get rid of giant event file, replace w/ smaller one; check both shuffle read & shuffle write
4e12013 [Imran Rashid] just use test case name for expectation file name
863ef64 [Imran Rashid] rename json files to avoid strange file names and not look like php
---
 .rat-excludes                                 |    7 +
 core/pom.xml                                  |    8 +
 .../org/apache/spark/JobExecutionStatus.java  |    8 +-
 .../status/api/v1/ApplicationStatus.java      |   30 +
 .../spark/status/api/v1/StageStatus.java      |   31 +
 .../spark/status/api/v1/TaskSorting.java      |   48 +
 .../java/org/apache/spark/util/EnumUtil.java  |   38 +
 .../scala/org/apache/spark/SparkContext.scala |    2 +-
 .../history/ApplicationHistoryProvider.scala  |    4 +-
 .../deploy/history/FsHistoryProvider.scala    |   14 +-
 .../spark/deploy/history/HistoryServer.scala  |   20 +-
 .../spark/deploy/master/ApplicationInfo.scala |    2 +-
 .../apache/spark/deploy/master/Master.scala   |   14 +-
 .../deploy/master/ui/ApplicationPage.scala    |   19 +-
 .../spark/deploy/master/ui/MasterPage.scala   |   12 +-
 .../spark/deploy/master/ui/MasterWebUI.scala  |   24 +-
 .../spark/status/api/v1/AllJobsResource.scala |   98 ++
 .../spark/status/api/v1/AllRDDResource.scala  |  104 ++
 .../status/api/v1/AllStagesResource.scala     |  309 ++++
 .../api/v1/ApplicationListResource.scala      |   94 ++
 .../status/api/v1/ExecutorListResource.scala  |   36 +
 .../status/api/v1/JacksonMessageWriter.scala  |   93 ++
 .../status/api/v1/JsonRootResource.scala      |  255 +++
 .../api/v1/OneApplicationResource.scala       |   31 +
 .../spark/status/api/v1/OneJobResource.scala  |   41 +
 .../spark/status/api/v1/OneRDDResource.scala  |   34 +
 .../status/api/v1/OneStageResource.scala      |  150 ++
 .../spark/status/api/v1/SecurityFilter.scala  |   38 +
 .../spark/status/api/v1/SimpleDateParam.scala |   55 +
 .../org/apache/spark/status/api/v1/api.scala  |  228 +++
 .../spark/storage/StorageStatusListener.scala |    6 +-
 .../scala/org/apache/spark/ui/SparkUI.scala   |   49 +-
 .../scala/org/apache/spark/ui/WebUI.scala     |    8 +-
 .../apache/spark/ui/exec/ExecutorsPage.scala  |   17 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    |   14 +-
 .../apache/spark/ui/jobs/AllStagesPage.scala  |   12 +-
 .../org/apache/spark/ui/jobs/JobPage.scala    |    2 +-
 .../spark/ui/jobs/JobProgressListener.scala   |    4 +
 .../org/apache/spark/ui/jobs/PoolPage.scala   |    2 +-
 .../org/apache/spark/ui/jobs/StagePage.scala  |   19 +-
 .../org/apache/spark/ui/storage/RDDPage.scala |   73 +-
 .../apache/spark/ui/storage/StoragePage.scala |    2 +-
 .../apache/spark/ui/storage/StorageTab.scala  |    6 +-
 .../application_list_json_expectation.json    |   53 +
 .../complete_stage_list_json_expectation.json |   67 +
 .../completed_app_list_json_expectation.json  |   53 +
 .../executor_list_json_expectation.json       |   17 +
 .../failed_stage_list_json_expectation.json   |   23 +
 ...multi_attempt_app_json_1__expectation.json |   15 +
 ...multi_attempt_app_json_2__expectation.json |   15 +
 .../job_list_json_expectation.json            |   43 +
 .../maxDate2_app_list_json_expectation.json   |   10 +
 .../maxDate_app_list_json_expectation.json    |   19 +
 .../minDate_app_list_json_expectation.json    |   35 +
 .../one_app_json_expectation.json             |   10 +
 ...ne_app_multi_attempt_json_expectation.json |   17 +
 .../one_job_json_expectation.json             |   15 +
 .../one_rdd_storage_json_expectation.json     |   64 +
 .../one_stage_attempt_json_expectation.json   |  270 ++++
 .../one_stage_json_expectation.json           |  270 ++++
 .../rdd_list_storage_json_expectation.json    |    9 +
 .../running_app_list_json_expectation.json    |    1 +
 .../stage_list_json_expectation.json          |   89 ++
 ...ist_with_accumulable_json_expectation.json |   27 +
 .../stage_task_list_expectation.json          |  561 +++++++
 ...multi_attempt_app_json_1__expectation.json |  193 +++
 ...multi_attempt_app_json_2__expectation.json |  193 +++
 ...k_list_w__offset___length_expectation.json | 1401 +++++++++++++++++
 ...stage_task_list_w__sortBy_expectation.json |  561 +++++++
 ...tBy_short_names___runtime_expectation.json |  561 +++++++
 ...rtBy_short_names__runtime_expectation.json |  561 +++++++
 ...mmary_w__custom_quantiles_expectation.json |   19 +
 ...sk_summary_w_shuffle_read_expectation.json |   19 +
 ...k_summary_w_shuffle_write_expectation.json |   19 +
 ...age_with_accumulable_json_expectation.json |  242 +++
 ...eded_failed_job_list_json_expectation.json |   43 +
 .../succeeded_job_list_json_expectation.json  |   29 +
 .../local-1422981759269/APPLICATION_COMPLETE  |    0
 .../local-1422981759269/EVENT_LOG_1           |   88 ++
 .../local-1422981759269/SPARK_VERSION_1.2.0   |    0
 .../local-1422981780767/APPLICATION_COMPLETE  |    0
 .../local-1422981780767/EVENT_LOG_1           |   82 +
 .../local-1422981780767/SPARK_VERSION_1.2.0   |    0
 .../local-1425081759269/APPLICATION_COMPLETE  |    0
 .../local-1425081759269/EVENT_LOG_1           |   88 ++
 .../local-1425081759269/SPARK_VERSION_1.2.0   |    0
 .../local-1426533911241/APPLICATION_COMPLETE  |    0
 .../local-1426533911241/EVENT_LOG_1           |   24 +
 .../local-1426533911241/SPARK_VERSION_1.2.0   |    0
 .../local-1426633911242/APPLICATION_COMPLETE  |    0
 .../local-1426633911242/EVENT_LOG_1           |   24 +
 .../local-1426633911242/SPARK_VERSION_1.2.0   |    0
 .../spark-events/local-1430917381534          |  231 +++
 .../org/apache/spark/JsonTestUtils.scala      |   34 +
 .../spark/deploy/JsonProtocolSuite.scala      |   14 +-
 .../deploy/history/HistoryServerSuite.scala   |  231 ++-
 .../status/api/v1/SimpleDateParamTest.scala   |   29 +
 .../org/apache/spark/ui/UISeleniumSuite.scala |  264 +++-
 docs/monitoring.md                            |   74 +
 pom.xml                                       |   12 +
 100 files changed, 8608 insertions(+), 172 deletions(-)
 create mode 100644 core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java
 create mode 100644 core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java
 create mode 100644 core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
 create mode 100644 core/src/main/java/org/apache/spark/util/EnumUtil.java
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
 create mode 100644 core/src/main/scala/org/apache/spark/status/api/v1/api.scala
 create mode 100644 core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json
 create mode 100644 core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json
 create mode 100755 core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE
 create mode 100755 core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1
 create mode 100755 core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0
 create mode 100755 core/src/test/resources/spark-events/local-1430917381534
 create mode 100644 core/src/test/scala/org/apache/spark/JsonTestUtils.scala
 create mode 100644 core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala

diff --git a/.rat-excludes b/.rat-excludes
index dccf2db8055ce..dc08c4a3673d2 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -74,5 +74,12 @@ logs
 .*scalastyle-output.xml
 .*dependency-reduced-pom.xml
 known_translations
+json_expectation
+local-1422981759269/*
+local-1422981780767/*
+local-1425081759269/*
+local-1426533911241/*
+local-1426633911242/*
+local-1430917381534/*
 DESCRIPTION
 NAMESPACE
diff --git a/core/pom.xml b/core/pom.xml
index 164a836cb8f0b..fc42f48973fe9 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -228,6 +228,14 @@
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
       <version>3.2.10</version>
     </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-core</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.mesos</groupId>
       <artifactId>mesos</artifactId>
diff --git a/core/src/main/java/org/apache/spark/JobExecutionStatus.java b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
index 6e161313702bb..0287fb79f8dd2 100644
--- a/core/src/main/java/org/apache/spark/JobExecutionStatus.java
+++ b/core/src/main/java/org/apache/spark/JobExecutionStatus.java
@@ -17,9 +17,15 @@
 
 package org.apache.spark;
 
+import org.apache.spark.util.EnumUtil;
+
 public enum JobExecutionStatus {
   RUNNING,
   SUCCEEDED,
   FAILED,
-  UNKNOWN
+  UNKNOWN;
+
+  public static JobExecutionStatus fromString(String str) {
+    return EnumUtil.parseIgnoreCase(JobExecutionStatus.class, str);
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java b/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java
new file mode 100644
index 0000000000000..8c7dcf776fda8
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1;
+
+import org.apache.spark.util.EnumUtil;
+
+public enum ApplicationStatus {
+  COMPLETED,
+  RUNNING;
+
+  public static ApplicationStatus fromString(String str) {
+    return EnumUtil.parseIgnoreCase(ApplicationStatus.class, str);
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java b/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java
new file mode 100644
index 0000000000000..9dbb565aab707
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1;
+
+import org.apache.spark.util.EnumUtil;
+
+public enum StageStatus {
+  ACTIVE,
+  COMPLETE,
+  FAILED,
+  PENDING;
+
+  public static StageStatus fromString(String str) {
+    return EnumUtil.parseIgnoreCase(StageStatus.class, str);
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
new file mode 100644
index 0000000000000..f19ed01d5aebf
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1;
+
+import org.apache.spark.util.EnumUtil;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public enum TaskSorting {
+  ID,
+  INCREASING_RUNTIME("runtime"),
+  DECREASING_RUNTIME("-runtime");
+
+  private final Set<String> alternateNames;
+  private TaskSorting(String... names) {
+    alternateNames = new HashSet<String>();
+    for (String n: names) {
+      alternateNames.add(n);
+    }
+  }
+
+  public static TaskSorting fromString(String str) {
+    String lower = str.toLowerCase();
+    for (TaskSorting t: values()) {
+      if (t.alternateNames.contains(lower)) {
+        return t;
+      }
+    }
+    return EnumUtil.parseIgnoreCase(TaskSorting.class, str);
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/util/EnumUtil.java b/core/src/main/java/org/apache/spark/util/EnumUtil.java
new file mode 100644
index 0000000000000..c40c7e727613c
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/util/EnumUtil.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util;
+
+import com.google.common.base.Joiner;
+import org.apache.spark.annotation.Private;
+
+@Private
+public class EnumUtil {
+  public static <E extends Enum<E>> E parseIgnoreCase(Class<E> clz, String str) {
+    E[] constants = clz.getEnumConstants();
+    if (str == null) {
+      return null;
+    }
+    for (E e : constants) {
+      if (e.name().equalsIgnoreCase(str)) {
+        return e;
+      }
+    }
+    throw new IllegalArgumentException(
+      String.format("Illegal type='%s'. Supported type values: %s",
+        str, Joiner.on(", ").join(constants)));
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b5f040ceb15ca..b59f562d05ead 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -430,7 +430,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
     _ui =
       if (conf.getBoolean("spark.ui.enabled", true)) {
         Some(SparkUI.createLiveUI(this, _conf, listenerBus, _jobProgressListener,
-          _env.securityManager,appName))
+          _env.securityManager,appName, startTime = startTime))
       } else {
         // For tests, do not enable the UI
         None
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index 6a5011af17458..298a8201960d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.history
 
 import org.apache.spark.ui.SparkUI
 
-private[history] case class ApplicationAttemptInfo(
+private[spark] case class ApplicationAttemptInfo(
     attemptId: Option[String],
     startTime: Long,
     endTime: Long,
@@ -27,7 +27,7 @@ private[history] case class ApplicationAttemptInfo(
     sparkUser: String,
     completed: Boolean = false)
 
-private[history] case class ApplicationHistoryInfo(
+private[spark] case class ApplicationHistoryInfo(
     id: String,
     name: String,
     attempts: List[ApplicationAttemptInfo])
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 993763f3aa092..45c2be34c8680 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -17,23 +17,21 @@
 
 package org.apache.spark.deploy.history
 
-import java.io.{IOException, BufferedInputStream, FileNotFoundException, InputStream}
+import java.io.{BufferedInputStream, FileNotFoundException, IOException, InputStream}
 import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
 
 import scala.collection.mutable
-import scala.concurrent.duration.Duration
 
-import com.google.common.util.concurrent.ThreadFactoryBuilder
-
-import com.google.common.util.concurrent.MoreExecutors
-import org.apache.hadoop.fs.permission.AccessControlException
+import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.permission.AccessControlException
+
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
 
 /**
  * A class that provides application history from event logs stored in the file system.
@@ -151,7 +149,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
             val conf = this.conf.clone()
             val appSecManager = new SecurityManager(conf)
             SparkUI.createHistoryUI(conf, replayBus, appSecManager, appId,
-              HistoryServer.getAttemptURI(appId, attempt.attemptId))
+              HistoryServer.getAttemptURI(appId, attempt.attemptId), attempt.startTime)
             // Do not call ui.bind() to avoid creating a new server for each application
           }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 754c8e9b6668b..50522e69dc519 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -25,6 +25,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.status.api.v1.{ApplicationInfo, ApplicationsListResource, JsonRootResource, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.{SignalLogger, Utils}
@@ -45,7 +46,7 @@ class HistoryServer(
     provider: ApplicationHistoryProvider,
     securityManager: SecurityManager,
     port: Int)
-  extends WebUI(securityManager, port, conf) with Logging {
+  extends WebUI(securityManager, port, conf) with Logging with UIRoot {
 
   // How many applications to retain
   private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50)
@@ -56,7 +57,7 @@ class HistoryServer(
       require(parts.length == 1 || parts.length == 2, s"Invalid app key $key")
       val ui = provider
         .getAppUI(parts(0), if (parts.length > 1) Some(parts(1)) else None)
-        .getOrElse(throw new NoSuchElementException())
+        .getOrElse(throw new NoSuchElementException(s"no app with key $key"))
       attachSparkUI(ui)
       ui
     }
@@ -113,6 +114,10 @@ class HistoryServer(
     }
   }
 
+  def getSparkUI(appKey: String): Option[SparkUI] = {
+    Option(appCache.get(appKey))
+  }
+
   initialize()
 
   /**
@@ -123,6 +128,9 @@ class HistoryServer(
    */
   def initialize() {
     attachPage(new HistoryPage(this))
+
+    attachHandler(JsonRootResource.getJsonServlet(this))
+
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
 
     val contextHandler = new ServletContextHandler
@@ -160,7 +168,13 @@ class HistoryServer(
    *
    * @return List of all known applications.
    */
-  def getApplicationList(): Iterable[ApplicationHistoryInfo] = provider.getListing()
+  def getApplicationList(): Iterable[ApplicationHistoryInfo] = {
+    provider.getListing()
+  }
+
+  def getApplicationInfoList: Iterator[ApplicationInfo] = {
+    getApplicationList().iterator.map(ApplicationsListResource.appHistoryInfoToPublicAppInfo)
+  }
 
   /**
    * Returns the provider configuration to show in the listing page.
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index f59d550d4f3b3..1620e95bea218 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -28,7 +28,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.ApplicationDescription
 import org.apache.spark.util.Utils
 
-private[deploy] class ApplicationInfo(
+private[spark] class ApplicationInfo(
     val startTime: Long,
     val id: String,
     val desc: ApplicationDescription,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 0fac3cdcf55e7..53e1903a3d125 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -754,9 +754,9 @@ private[master] class Master(
 
   /**
    * Rebuild a new SparkUI from the given application's event logs.
-   * Return whether this is successful.
+   * Return the UI if successful, else None
    */
-  private def rebuildSparkUI(app: ApplicationInfo): Boolean = {
+  private[master] def rebuildSparkUI(app: ApplicationInfo): Option[SparkUI] = {
     val appName = app.desc.name
     val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found"
     try {
@@ -764,7 +764,7 @@ private[master] class Master(
         .getOrElse {
           // Event logging is not enabled for this application
           app.desc.appUiUrl = notFoundBasePath
-          return false
+          return None
         }
 
       val eventLogFilePrefix = EventLoggingListener.getLogPath(
@@ -787,7 +787,7 @@ private[master] class Master(
       val logInput = EventLoggingListener.openEventLog(new Path(eventLogFile), fs)
       val replayBus = new ReplayListenerBus()
       val ui = SparkUI.createHistoryUI(new SparkConf, replayBus, new SecurityManager(conf),
-        appName + status, HistoryServer.UI_PATH_PREFIX + s"/${app.id}")
+        appName + status, HistoryServer.UI_PATH_PREFIX + s"/${app.id}", app.startTime)
       val maybeTruncated = eventLogFile.endsWith(EventLoggingListener.IN_PROGRESS)
       try {
         replayBus.replay(logInput, eventLogFile, maybeTruncated)
@@ -798,7 +798,7 @@ private[master] class Master(
       webUi.attachSparkUI(ui)
       // Application UI is successfully rebuilt, so link the Master UI to it
       app.desc.appUiUrl = ui.basePath
-      true
+      Some(ui)
     } catch {
       case fnf: FileNotFoundException =>
         // Event logging is enabled for this application, but no event logs are found
@@ -808,7 +808,7 @@ private[master] class Master(
         msg += " Did you specify the correct logging directory?"
         msg = URLEncoder.encode(msg, "UTF-8")
         app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
-        false
+        None
       case e: Exception =>
         // Relay exception message to application UI page
         val title = s"Application history load error (${app.id})"
@@ -817,7 +817,7 @@ private[master] class Master(
         logError(msg, e)
         msg = URLEncoder.encode(msg, "UTF-8")
         app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&exception=$exception&title=$title"
-        false
+        None
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 273f077bd8f57..06e265f99e231 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -23,10 +23,8 @@ import scala.concurrent.Await
 import scala.xml.Node
 
 import akka.pattern.ask
-import org.json4s.JValue
-import org.json4s.JsonAST.JNothing
 
-import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
+import org.apache.spark.deploy.ExecutorState
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.ExecutorDesc
 import org.apache.spark.ui.{UIUtils, WebUIPage}
@@ -37,21 +35,6 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
   private val master = parent.masterActorRef
   private val timeout = parent.timeout
 
-  /** Executor details for a particular application */
-  override def renderJson(request: HttpServletRequest): JValue = {
-    val appId = request.getParameter("appId")
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, timeout)
-    val app = state.activeApps.find(_.id == appId).getOrElse({
-      state.completedApps.find(_.id == appId).getOrElse(null)
-    })
-    if (app == null) {
-      JNothing
-    } else {
-      JsonProtocol.writeApplicationInfo(app)
-    }
-  }
-
   /** Executor details for a particular application */
   def render(request: HttpServletRequest): Seq[Node] = {
     val appId = request.getParameter("appId")
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 1f2c3fdbfb2bc..756927682cd24 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -35,10 +35,13 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private val master = parent.masterActorRef
   private val timeout = parent.timeout
 
-  override def renderJson(request: HttpServletRequest): JValue = {
+  def getMasterState: MasterStateResponse = {
     val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, timeout)
-    JsonProtocol.writeMasterState(state)
+    Await.result(stateFuture, timeout)
+  }
+
+  override def renderJson(request: HttpServletRequest): JValue = {
+    JsonProtocol.writeMasterState(getMasterState)
   }
 
   def handleAppKillRequest(request: HttpServletRequest): Unit = {
@@ -68,8 +71,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
 
   /** Index view listing applications and executors */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, timeout)
+    val state = getMasterState
 
     val workerHeaders = Seq("Worker Id", "Address", "State", "Cores", "Memory")
     val workers = state.workers.sortBy(_.id)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index dea0a65eeeaa6..eb26e9f99c70b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.master.ui
 
 import org.apache.spark.Logging
 import org.apache.spark.deploy.master.Master
+import org.apache.spark.status.api.v1.{ApplicationsListResource, ApplicationInfo, JsonRootResource, UIRoot}
 import org.apache.spark.ui.{SparkUI, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.RpcUtils
@@ -28,12 +29,15 @@ import org.apache.spark.util.RpcUtils
  */
 private[master]
 class MasterWebUI(val master: Master, requestedPort: Int)
-  extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging {
+  extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging
+  with UIRoot {
 
   val masterActorRef = master.self
   val timeout = RpcUtils.askTimeout(master.conf)
   val killEnabled = master.conf.getBoolean("spark.ui.killEnabled", true)
 
+  val masterPage = new MasterPage(this)
+
   initialize()
 
   /** Initialize all components of the server. */
@@ -43,6 +47,7 @@ class MasterWebUI(val master: Master, requestedPort: Int)
     attachPage(new HistoryNotFoundPage(this))
     attachPage(masterPage)
     attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
+    attachHandler(JsonRootResource.getJsonServlet(this))
     attachHandler(createRedirectHandler(
       "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST")))
     attachHandler(createRedirectHandler(
@@ -60,6 +65,23 @@ class MasterWebUI(val master: Master, requestedPort: Int)
     assert(serverInfo.isDefined, "Master UI must be bound to a server before detaching SparkUIs")
     ui.getHandlers.foreach(detachHandler)
   }
+
+  def getApplicationInfoList: Iterator[ApplicationInfo] = {
+    val state = masterPage.getMasterState
+    val activeApps = state.activeApps.sortBy(_.startTime).reverse
+    val completedApps = state.completedApps.sortBy(_.endTime).reverse
+    activeApps.iterator.map { ApplicationsListResource.convertApplicationInfo(_, false) } ++
+      completedApps.iterator.map { ApplicationsListResource.convertApplicationInfo(_, true) }
+  }
+
+  def getSparkUI(appId: String): Option[SparkUI] = {
+    val state = masterPage.getMasterState
+    val activeApps = state.activeApps.sortBy(_.startTime).reverse
+    val completedApps = state.completedApps.sortBy(_.endTime).reverse
+    (activeApps ++ completedApps).find { _.id == appId }.flatMap {
+      master.rebuildSparkUI
+    }
+  }
 }
 
 private[master] object MasterWebUI {
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
new file mode 100644
index 0000000000000..5783df5d8220c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.{Arrays, Date, List => JList}
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.JobProgressListener
+import org.apache.spark.ui.jobs.UIData.JobUIData
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class AllJobsResource(ui: SparkUI) {
+
+  @GET
+  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
+    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
+      AllJobsResource.getStatusToJobs(ui)
+    val adjStatuses: JList[JobExecutionStatus] = {
+      if (statuses.isEmpty) {
+        Arrays.asList(JobExecutionStatus.values(): _*)
+      } else {
+        statuses
+      }
+    }
+    val jobInfos = for {
+      (status, jobs) <- statusToJobs
+      job <- jobs if adjStatuses.contains(status)
+    } yield {
+      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
+    }
+    jobInfos.sortBy{- _.jobId}
+  }
+
+}
+
+private[v1] object AllJobsResource {
+
+  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
+    val statusToJobs = ui.jobProgressListener.synchronized {
+      Seq(
+        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
+        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
+        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
+      )
+    }
+    statusToJobs
+  }
+
+  def convertJobData(
+      job: JobUIData,
+      listener: JobProgressListener,
+      includeStageDetails: Boolean): JobData = {
+    listener.synchronized {
+      val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max)
+      val lastStageData = lastStageInfo.flatMap { s =>
+        listener.stageIdToData.get((s.stageId, s.attemptId))
+      }
+      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
+      val lastStageDescription = lastStageData.flatMap { _.description }
+      new JobData(
+        jobId = job.jobId,
+        name = lastStageName,
+        description = lastStageDescription,
+        submissionTime = job.submissionTime.map{new Date(_)},
+        completionTime = job.completionTime.map{new Date(_)},
+        stageIds = job.stageIds,
+        jobGroup = job.jobGroup,
+        status = job.status,
+        numTasks = job.numTasks,
+        numActiveTasks = job.numActiveTasks,
+        numCompletedTasks = job.numCompletedTasks,
+        numSkippedTasks = job.numCompletedTasks,
+        numFailedTasks = job.numFailedTasks,
+        numActiveStages = job.numActiveStages,
+        numCompletedStages = job.completedStageIndices.size,
+        numSkippedStages = job.numSkippedStages,
+        numFailedStages = job.numFailedStages
+      )
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala
new file mode 100644
index 0000000000000..645ede26a0879
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{GET, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.storage.{RDDInfo, StorageStatus, StorageUtils}
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.storage.StorageListener
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class AllRDDResource(ui: SparkUI) {
+
+  @GET
+  def rddList(): Seq[RDDStorageInfo] = {
+    val storageStatusList = ui.storageListener.storageStatusList
+    val rddInfos = ui.storageListener.rddInfoList
+    rddInfos.map{rddInfo =>
+      AllRDDResource.getRDDStorageInfo(rddInfo.id, rddInfo, storageStatusList,
+        includeDetails = false)
+    }
+  }
+
+}
+
+private[spark] object AllRDDResource {
+
+  def getRDDStorageInfo(
+      rddId: Int,
+      listener: StorageListener,
+      includeDetails: Boolean): Option[RDDStorageInfo] = {
+    val storageStatusList = listener.storageStatusList
+    listener.rddInfoList.find { _.id == rddId }.map { rddInfo =>
+      getRDDStorageInfo(rddId, rddInfo, storageStatusList, includeDetails)
+    }
+  }
+
+  def getRDDStorageInfo(
+      rddId: Int,
+      rddInfo: RDDInfo,
+      storageStatusList: Seq[StorageStatus],
+      includeDetails: Boolean): RDDStorageInfo = {
+    val workers = storageStatusList.map { (rddId, _) }
+    val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList)
+    val blocks = storageStatusList
+      .flatMap { _.rddBlocksById(rddId) }
+      .sortWith { _._1.name < _._1.name }
+      .map { case (blockId, status) =>
+        (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
+      }
+
+    val dataDistribution = if (includeDetails) {
+      Some(storageStatusList.map { status =>
+        new RDDDataDistribution(
+          address = status.blockManagerId.hostPort,
+          memoryUsed = status.memUsedByRdd(rddId),
+          memoryRemaining = status.memRemaining,
+          diskUsed = status.diskUsedByRdd(rddId)
+        ) } )
+    } else {
+      None
+    }
+    val partitions = if (includeDetails) {
+      Some(blocks.map { case (id, block, locations) =>
+        new RDDPartitionInfo(
+          blockName = id.name,
+          storageLevel = block.storageLevel.description,
+          memoryUsed = block.memSize,
+          diskUsed = block.diskSize,
+          executors = locations
+        )
+      } )
+    } else {
+      None
+    }
+
+    new RDDStorageInfo(
+      id = rddId,
+      name = rddInfo.name,
+      numPartitions = rddInfo.numPartitions,
+      numCachedPartitions = rddInfo.numCachedPartitions,
+      storageLevel = rddInfo.storageLevel.description,
+      memoryUsed = rddInfo.memSize,
+      diskUsed = rddInfo.diskSize,
+      dataDistribution = dataDistribution,
+      partitions = partitions
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala
new file mode 100644
index 0000000000000..50608588f09ae
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.{Arrays, Date, List => JList}
+import javax.ws.rs.{GET, PathParam, Produces, QueryParam}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.executor.{InputMetrics => InternalInputMetrics, OutputMetrics => InternalOutputMetrics, ShuffleReadMetrics => InternalShuffleReadMetrics, ShuffleWriteMetrics => InternalShuffleWriteMetrics, TaskMetrics => InternalTaskMetrics}
+import org.apache.spark.scheduler.{AccumulableInfo => InternalAccumulableInfo, StageInfo}
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData}
+import org.apache.spark.util.Distribution
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class AllStagesResource(ui: SparkUI) {
+
+  @GET
+  def stageList(@QueryParam("status") statuses: JList[StageStatus]): Seq[StageData] = {
+    val listener = ui.jobProgressListener
+    val stageAndStatus = AllStagesResource.stagesAndStatus(ui)
+    val adjStatuses = {
+      if (statuses.isEmpty()) {
+        Arrays.asList(StageStatus.values(): _*)
+      } else {
+        statuses
+      }
+    }
+    for {
+      (status, stageList) <- stageAndStatus
+      stageInfo: StageInfo <- stageList if adjStatuses.contains(status)
+      stageUiData: StageUIData <- listener.synchronized {
+        listener.stageIdToData.get((stageInfo.stageId, stageInfo.attemptId))
+      }
+    } yield {
+      AllStagesResource.stageUiToStageData(status, stageInfo, stageUiData, includeDetails = false)
+    }
+  }
+}
+
+private[v1] object AllStagesResource {
+  def stageUiToStageData(
+      status: StageStatus,
+      stageInfo: StageInfo,
+      stageUiData: StageUIData,
+      includeDetails: Boolean): StageData = {
+
+    val taskData = if (includeDetails) {
+      Some(stageUiData.taskData.map { case (k, v) => k -> convertTaskData(v) } )
+    } else {
+      None
+    }
+    val executorSummary = if (includeDetails) {
+      Some(stageUiData.executorSummary.map { case (k, summary) =>
+        k -> new ExecutorStageSummary(
+          taskTime = summary.taskTime,
+          failedTasks = summary.failedTasks,
+          succeededTasks = summary.succeededTasks,
+          inputBytes = summary.inputBytes,
+          outputBytes = summary.outputBytes,
+          shuffleRead = summary.shuffleRead,
+          shuffleWrite = summary.shuffleWrite,
+          memoryBytesSpilled = summary.memoryBytesSpilled,
+          diskBytesSpilled = summary.diskBytesSpilled
+        )
+      })
+    } else {
+      None
+    }
+
+    val accumulableInfo = stageUiData.accumulables.values.map { convertAccumulableInfo }.toSeq
+
+    new StageData(
+      status = status,
+      stageId = stageInfo.stageId,
+      attemptId = stageInfo.attemptId,
+      numActiveTasks = stageUiData.numActiveTasks,
+      numCompleteTasks = stageUiData.numCompleteTasks,
+      numFailedTasks = stageUiData.numFailedTasks,
+      executorRunTime = stageUiData.executorRunTime,
+      inputBytes = stageUiData.inputBytes,
+      inputRecords = stageUiData.inputRecords,
+      outputBytes = stageUiData.outputBytes,
+      outputRecords = stageUiData.outputRecords,
+      shuffleReadBytes = stageUiData.shuffleReadTotalBytes,
+      shuffleReadRecords = stageUiData.shuffleReadRecords,
+      shuffleWriteBytes = stageUiData.shuffleWriteBytes,
+      shuffleWriteRecords = stageUiData.shuffleWriteRecords,
+      memoryBytesSpilled = stageUiData.memoryBytesSpilled,
+      diskBytesSpilled = stageUiData.diskBytesSpilled,
+      schedulingPool = stageUiData.schedulingPool,
+      name = stageInfo.name,
+      details = stageInfo.details,
+      accumulatorUpdates = accumulableInfo,
+      tasks = taskData,
+      executorSummary = executorSummary
+    )
+  }
+
+  def stagesAndStatus(ui: SparkUI): Seq[(StageStatus, Seq[StageInfo])] = {
+    val listener = ui.jobProgressListener
+    listener.synchronized {
+      Seq(
+        StageStatus.ACTIVE -> listener.activeStages.values.toSeq,
+        StageStatus.COMPLETE -> listener.completedStages.reverse.toSeq,
+        StageStatus.FAILED -> listener.failedStages.reverse.toSeq,
+        StageStatus.PENDING -> listener.pendingStages.values.toSeq
+      )
+    }
+  }
+
+  def convertTaskData(uiData: TaskUIData): TaskData = {
+    new TaskData(
+      taskId = uiData.taskInfo.taskId,
+      index = uiData.taskInfo.index,
+      attempt = uiData.taskInfo.attempt,
+      launchTime = new Date(uiData.taskInfo.launchTime),
+      executorId = uiData.taskInfo.executorId,
+      host = uiData.taskInfo.host,
+      taskLocality = uiData.taskInfo.taskLocality.toString(),
+      speculative = uiData.taskInfo.speculative,
+      accumulatorUpdates = uiData.taskInfo.accumulables.map { convertAccumulableInfo },
+      errorMessage = uiData.errorMessage,
+      taskMetrics = uiData.taskMetrics.map { convertUiTaskMetrics }
+    )
+  }
+
+  def taskMetricDistributions(
+      allTaskData: Iterable[TaskUIData],
+      quantiles: Array[Double]): TaskMetricDistributions = {
+
+    val rawMetrics = allTaskData.flatMap{_.taskMetrics}.toSeq
+
+    def metricQuantiles(f: InternalTaskMetrics => Double): IndexedSeq[Double] =
+      Distribution(rawMetrics.map { d => f(d) }).get.getQuantiles(quantiles)
+
+    // We need to do a lot of similar munging to nested metrics here.  For each one,
+    // we want (a) extract the values for nested metrics (b) make a distribution for each metric
+    // (c) shove the distribution into the right field in our return type and (d) only return
+    // a result if the option is defined for any of the tasks.  MetricHelper is a little util
+    // to make it a little easier to deal w/ all of the nested options.  Mostly it lets us just
+    // implement one "build" method, which just builds the quantiles for each field.
+
+    val inputMetrics: Option[InputMetricDistributions] =
+      new MetricHelper[InternalInputMetrics, InputMetricDistributions](rawMetrics, quantiles) {
+        def getSubmetrics(raw: InternalTaskMetrics): Option[InternalInputMetrics] = {
+          raw.inputMetrics
+        }
+
+        def build: InputMetricDistributions = new InputMetricDistributions(
+          bytesRead = submetricQuantiles(_.bytesRead),
+          recordsRead = submetricQuantiles(_.recordsRead)
+        )
+      }.metricOption
+
+    val outputMetrics: Option[OutputMetricDistributions] =
+      new MetricHelper[InternalOutputMetrics, OutputMetricDistributions](rawMetrics, quantiles) {
+        def getSubmetrics(raw:InternalTaskMetrics): Option[InternalOutputMetrics] = {
+          raw.outputMetrics
+        }
+        def build: OutputMetricDistributions = new OutputMetricDistributions(
+          bytesWritten = submetricQuantiles(_.bytesWritten),
+          recordsWritten = submetricQuantiles(_.recordsWritten)
+        )
+      }.metricOption
+
+    val shuffleReadMetrics: Option[ShuffleReadMetricDistributions] =
+      new MetricHelper[InternalShuffleReadMetrics, ShuffleReadMetricDistributions](rawMetrics,
+        quantiles) {
+        def getSubmetrics(raw: InternalTaskMetrics): Option[InternalShuffleReadMetrics] = {
+          raw.shuffleReadMetrics
+        }
+        def build: ShuffleReadMetricDistributions = new ShuffleReadMetricDistributions(
+          readBytes = submetricQuantiles(_.totalBytesRead),
+          readRecords = submetricQuantiles(_.recordsRead),
+          remoteBytesRead = submetricQuantiles(_.remoteBytesRead),
+          remoteBlocksFetched = submetricQuantiles(_.remoteBlocksFetched),
+          localBlocksFetched = submetricQuantiles(_.localBlocksFetched),
+          totalBlocksFetched = submetricQuantiles(_.totalBlocksFetched),
+          fetchWaitTime = submetricQuantiles(_.fetchWaitTime)
+        )
+      }.metricOption
+
+    val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions] =
+      new MetricHelper[InternalShuffleWriteMetrics, ShuffleWriteMetricDistributions](rawMetrics,
+        quantiles) {
+        def getSubmetrics(raw: InternalTaskMetrics): Option[InternalShuffleWriteMetrics] = {
+          raw.shuffleWriteMetrics
+        }
+        def build: ShuffleWriteMetricDistributions = new ShuffleWriteMetricDistributions(
+          writeBytes = submetricQuantiles(_.shuffleBytesWritten),
+          writeRecords = submetricQuantiles(_.shuffleRecordsWritten),
+          writeTime = submetricQuantiles(_.shuffleWriteTime)
+        )
+      }.metricOption
+
+    new TaskMetricDistributions(
+      quantiles = quantiles,
+      executorDeserializeTime = metricQuantiles(_.executorDeserializeTime),
+      executorRunTime = metricQuantiles(_.executorRunTime),
+      resultSize = metricQuantiles(_.resultSize),
+      jvmGcTime = metricQuantiles(_.jvmGCTime),
+      resultSerializationTime = metricQuantiles(_.resultSerializationTime),
+      memoryBytesSpilled = metricQuantiles(_.memoryBytesSpilled),
+      diskBytesSpilled = metricQuantiles(_.diskBytesSpilled),
+      inputMetrics = inputMetrics,
+      outputMetrics = outputMetrics,
+      shuffleReadMetrics = shuffleReadMetrics,
+      shuffleWriteMetrics = shuffleWriteMetrics
+    )
+  }
+
+  def convertAccumulableInfo(acc: InternalAccumulableInfo): AccumulableInfo = {
+    new AccumulableInfo(acc.id, acc.name, acc.update, acc.value)
+  }
+
+  def convertUiTaskMetrics(internal: InternalTaskMetrics): TaskMetrics = {
+    new TaskMetrics(
+      executorDeserializeTime = internal.executorDeserializeTime,
+      executorRunTime = internal.executorRunTime,
+      resultSize = internal.resultSize,
+      jvmGcTime = internal.jvmGCTime,
+      resultSerializationTime = internal.resultSerializationTime,
+      memoryBytesSpilled = internal.memoryBytesSpilled,
+      diskBytesSpilled = internal.diskBytesSpilled,
+      inputMetrics = internal.inputMetrics.map { convertInputMetrics },
+      outputMetrics = Option(internal.outputMetrics).flatten.map { convertOutputMetrics },
+      shuffleReadMetrics = internal.shuffleReadMetrics.map { convertShuffleReadMetrics },
+      shuffleWriteMetrics = internal.shuffleWriteMetrics.map { convertShuffleWriteMetrics }
+    )
+  }
+
+  def convertInputMetrics(internal: InternalInputMetrics): InputMetrics = {
+    new InputMetrics(
+      bytesRead = internal.bytesRead,
+      recordsRead = internal.recordsRead
+    )
+  }
+
+  def convertOutputMetrics(internal: InternalOutputMetrics): OutputMetrics = {
+    new OutputMetrics(
+      bytesWritten = internal.bytesWritten,
+      recordsWritten = internal.recordsWritten
+    )
+  }
+
+  def convertShuffleReadMetrics(internal: InternalShuffleReadMetrics): ShuffleReadMetrics = {
+    new ShuffleReadMetrics(
+      remoteBlocksFetched = internal.remoteBlocksFetched,
+      localBlocksFetched = internal.localBlocksFetched,
+      fetchWaitTime = internal.fetchWaitTime,
+      remoteBytesRead = internal.remoteBytesRead,
+      totalBlocksFetched = internal.totalBlocksFetched,
+      recordsRead = internal.recordsRead
+    )
+  }
+
+  def convertShuffleWriteMetrics(internal: InternalShuffleWriteMetrics): ShuffleWriteMetrics = {
+    new ShuffleWriteMetrics(
+      bytesWritten = internal.shuffleBytesWritten,
+      writeTime = internal.shuffleWriteTime,
+      recordsWritten = internal.shuffleRecordsWritten
+    )
+  }
+}
+
+/**
+ * Helper for getting distributions from nested metric types.  Many of the metrics we want are
+ * contained in options inside TaskMetrics (eg., ShuffleWriteMetrics). This makes it easy to handle
+ * the options (returning None if the metrics are all empty), and extract the quantiles for each
+ * metric.  After creating an instance, call metricOption to get the result type.
+ */
+private[v1] abstract class MetricHelper[I,O](
+    rawMetrics: Seq[InternalTaskMetrics],
+    quantiles: Array[Double]) {
+
+  def getSubmetrics(raw: InternalTaskMetrics): Option[I]
+
+  def build: O
+
+  val data: Seq[I] = rawMetrics.flatMap(getSubmetrics)
+
+  /** applies the given function to all input metrics, and returns the quantiles */
+  def submetricQuantiles(f: I => Double): IndexedSeq[Double] = {
+    Distribution(data.map { d => f(d) }).get.getQuantiles(quantiles)
+  }
+
+  def metricOption: Option[O] = {
+    if (data.isEmpty) {
+      None
+    } else {
+      Some(build)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
new file mode 100644
index 0000000000000..17b521f3e1d41
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.{Arrays, Date, List => JList}
+import javax.ws.rs.{DefaultValue, GET, Produces, QueryParam}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.deploy.history.ApplicationHistoryInfo
+import org.apache.spark.deploy.master.{ApplicationInfo => InternalApplicationInfo}
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class ApplicationListResource(uiRoot: UIRoot) {
+
+  @GET
+  def appList(
+      @QueryParam("status") status: JList[ApplicationStatus],
+      @DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam,
+      @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam)
+  : Iterator[ApplicationInfo] = {
+    val allApps = uiRoot.getApplicationInfoList
+    val adjStatus = {
+      if (status.isEmpty) {
+        Arrays.asList(ApplicationStatus.values(): _*)
+      } else {
+        status
+      }
+    }
+    val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED)
+    val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING)
+    allApps.filter { app =>
+      val anyRunning = app.attempts.exists(!_.completed)
+      // if any attempt is still running, we consider the app to also still be running
+      val statusOk = (!anyRunning && includeCompleted) ||
+        (anyRunning && includeRunning)
+      // keep the app if *any* attempts fall in the right time window
+      val dateOk = app.attempts.exists { attempt =>
+        attempt.startTime.getTime >= minDate.timestamp &&
+          attempt.startTime.getTime <= maxDate.timestamp
+      }
+      statusOk && dateOk
+    }
+  }
+}
+
+private[spark] object ApplicationsListResource {
+  def appHistoryInfoToPublicAppInfo(app: ApplicationHistoryInfo): ApplicationInfo = {
+    new ApplicationInfo(
+      id = app.id,
+      name = app.name,
+      attempts = app.attempts.map { internalAttemptInfo =>
+        new ApplicationAttemptInfo(
+          attemptId = internalAttemptInfo.attemptId,
+          startTime = new Date(internalAttemptInfo.startTime),
+          endTime = new Date(internalAttemptInfo.endTime),
+          sparkUser = internalAttemptInfo.sparkUser,
+          completed = internalAttemptInfo.completed
+        )
+      }
+    )
+  }
+
+  def convertApplicationInfo(
+      internal: InternalApplicationInfo,
+      completed: Boolean): ApplicationInfo = {
+    // standalone application info always has just one attempt
+    new ApplicationInfo(
+      id = internal.id,
+      name = internal.desc.name,
+      attempts = Seq(new ApplicationAttemptInfo(
+        attemptId = None,
+        startTime = new Date(internal.startTime),
+        endTime = new Date(internal.endTime),
+        sparkUser = internal.desc.user,
+        completed = completed
+      ))
+    )
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala
new file mode 100644
index 0000000000000..8ad4656b4dada
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala
@@ -0,0 +1,36 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{GET, PathParam, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.exec.ExecutorsPage
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class ExecutorListResource(ui: SparkUI) {
+
+  @GET
+  def executorList(): Seq[ExecutorSummary] = {
+    val listener = ui.executorsListener
+    val storageStatusList = listener.storageStatusList
+    (0 until storageStatusList.size).map { statusId =>
+      ExecutorsPage.getExecInfo(listener, statusId)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
new file mode 100644
index 0000000000000..202a5191ad57d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.io.OutputStream
+import java.lang.annotation.Annotation
+import java.lang.reflect.Type
+import java.text.SimpleDateFormat
+import java.util.{Calendar, SimpleTimeZone}
+import javax.ws.rs.Produces
+import javax.ws.rs.core.{MediaType, MultivaluedMap}
+import javax.ws.rs.ext.{MessageBodyWriter, Provider}
+
+import com.fasterxml.jackson.annotation.JsonInclude
+import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
+
+/**
+ * This class converts the POJO metric responses into json, using jackson.
+ *
+ * This doesn't follow the standard jersey-jackson plugin options, because we want to stick
+ * with an old version of jersey (since we have it from yarn anyway) and don't want to pull in lots
+ * of dependencies from a new plugin.
+ *
+ * Note that jersey automatically discovers this class based on its package and its annotations.
+ */
+@Provider
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{
+
+  val mapper = new ObjectMapper() {
+    override def writeValueAsString(t: Any): String = {
+      super.writeValueAsString(t)
+    }
+  }
+  mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule)
+  mapper.enable(SerializationFeature.INDENT_OUTPUT)
+  mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL)
+  mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat)
+
+  override def isWriteable(
+      aClass: Class[_],
+      `type`: Type,
+      annotations: Array[Annotation],
+      mediaType: MediaType): Boolean = {
+      true
+  }
+
+  override def writeTo(
+      t: Object,
+      aClass: Class[_],
+      `type`: Type,
+      annotations: Array[Annotation],
+      mediaType: MediaType,
+      multivaluedMap: MultivaluedMap[String, AnyRef],
+      outputStream: OutputStream): Unit = {
+    t match {
+      case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8"))
+      case _ => mapper.writeValue(outputStream, t)
+    }
+  }
+
+  override def getSize(
+      t: Object,
+      aClass: Class[_],
+      `type`: Type,
+      annotations: Array[Annotation],
+      mediaType: MediaType): Long = {
+    -1L
+  }
+}
+
+private[spark] object JacksonMessageWriter {
+  def makeISODateFormat: SimpleDateFormat = {
+    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
+    val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
+    iso8601.setCalendar(cal)
+    iso8601
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
new file mode 100644
index 0000000000000..c3ec45f54681b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.servlet.ServletContext
+import javax.ws.rs._
+import javax.ws.rs.core.{Context, Response}
+
+import com.sun.jersey.api.core.ResourceConfig
+import com.sun.jersey.spi.container.servlet.ServletContainer
+import org.eclipse.jetty.server.handler.ContextHandler
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
+
+import org.apache.spark.SecurityManager
+import org.apache.spark.ui.SparkUI
+
+/**
+ * Main entry point for serving spark application metrics as json, using JAX-RS.
+ *
+ * Each resource should have endpoints that return **public** classes defined in api.scala.  Mima
+ * binary compatibility checks ensure that we don't inadvertently make changes that break the api.
+ * The returned objects are automatically converted to json by jackson with JacksonMessageWriter.
+ * In addition, there are a number of tests in HistoryServerSuite that compare the json to "golden
+ * files".  Any changes and additions should be reflected there as well -- see the notes in
+ * HistoryServerSuite.
+ */
+@Path("/v1")
+private[v1] class JsonRootResource extends UIRootFromServletContext {
+
+  @Path("applications")
+  def getApplicationList(): ApplicationListResource = {
+    new ApplicationListResource(uiRoot)
+  }
+
+  @Path("applications/{appId}")
+  def getApplication(): OneApplicationResource = {
+    new OneApplicationResource(uiRoot)
+  }
+
+  @Path("applications/{appId}/{attemptId}/jobs")
+  def getJobs(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): AllJobsResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new AllJobsResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/jobs")
+  def getJobs(@PathParam("appId") appId: String): AllJobsResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new AllJobsResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/jobs/{jobId: \\d+}")
+  def getJob(@PathParam("appId") appId: String): OneJobResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new OneJobResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/jobs/{jobId: \\d+}")
+  def getJob(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): OneJobResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new OneJobResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/executors")
+  def getExecutors(@PathParam("appId") appId: String): ExecutorListResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new ExecutorListResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/executors")
+  def getExecutors(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): ExecutorListResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new ExecutorListResource(ui)
+    }
+  }
+
+
+  @Path("applications/{appId}/stages")
+  def getStages(@PathParam("appId") appId: String): AllStagesResource= {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new AllStagesResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/stages")
+  def getStages(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): AllStagesResource= {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new AllStagesResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/stages/{stageId: \\d+}")
+  def getStage(@PathParam("appId") appId: String): OneStageResource= {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new OneStageResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/stages/{stageId: \\d+}")
+  def getStage(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): OneStageResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new OneStageResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/storage/rdd")
+  def getRdds(@PathParam("appId") appId: String): AllRDDResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new AllRDDResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/storage/rdd")
+  def getRdds(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): AllRDDResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new AllRDDResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/storage/rdd/{rddId: \\d+}")
+  def getRdd(@PathParam("appId") appId: String): OneRDDResource = {
+    uiRoot.withSparkUI(appId, None) { ui =>
+      new OneRDDResource(ui)
+    }
+  }
+
+  @Path("applications/{appId}/{attemptId}/storage/rdd/{rddId: \\d+}")
+  def getRdd(
+      @PathParam("appId") appId: String,
+      @PathParam("attemptId") attemptId: String): OneRDDResource = {
+    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+      new OneRDDResource(ui)
+    }
+  }
+
+}
+
+private[spark] object JsonRootResource {
+
+  def getJsonServlet(uiRoot: UIRoot): ServletContextHandler = {
+    val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS)
+    jerseyContext.setContextPath("/json")
+    val holder:ServletHolder = new ServletHolder(classOf[ServletContainer])
+    holder.setInitParameter("com.sun.jersey.config.property.resourceConfigClass",
+      "com.sun.jersey.api.core.PackagesResourceConfig")
+    holder.setInitParameter("com.sun.jersey.config.property.packages",
+      "org.apache.spark.status.api.v1")
+    holder.setInitParameter(ResourceConfig.PROPERTY_CONTAINER_REQUEST_FILTERS,
+      classOf[SecurityFilter].getCanonicalName)
+    UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot)
+    jerseyContext.addServlet(holder, "/*")
+    jerseyContext
+  }
+}
+
+/**
+ * This trait is shared by the all the root containers for application UI information --
+ * the HistoryServer, the Master UI, and the application UI.  This provides the common
+ * interface needed for them all to expose application info as json.
+ */
+private[spark] trait UIRoot {
+  def getSparkUI(appKey: String): Option[SparkUI]
+  def getApplicationInfoList: Iterator[ApplicationInfo]
+
+  /**
+   * Get the spark UI with the given appID, and apply a function
+   * to it.  If there is no such app, throw an appropriate exception
+   */
+  def withSparkUI[T](appId: String, attemptId: Option[String])(f: SparkUI => T): T = {
+    val appKey = attemptId.map(appId + "/" + _).getOrElse(appId)
+    getSparkUI(appKey) match {
+      case Some(ui) =>
+        f(ui)
+      case None => throw new NotFoundException("no such app: " + appId)
+    }
+  }
+  def securityManager: SecurityManager
+}
+
+private[v1] object UIRootFromServletContext {
+
+  private val attribute = getClass.getCanonicalName
+
+  def setUiRoot(contextHandler: ContextHandler, uiRoot: UIRoot): Unit = {
+    contextHandler.setAttribute(attribute, uiRoot)
+  }
+
+  def getUiRoot(context: ServletContext): UIRoot = {
+    context.getAttribute(attribute).asInstanceOf[UIRoot]
+  }
+}
+
+private[v1] trait UIRootFromServletContext {
+  @Context
+  var servletContext: ServletContext = _
+
+  def uiRoot: UIRoot = UIRootFromServletContext.getUiRoot(servletContext)
+}
+
+private[v1] class NotFoundException(msg: String) extends WebApplicationException(
+  new NoSuchElementException(msg),
+    Response
+      .status(Response.Status.NOT_FOUND)
+      .entity(ErrorWrapper(msg))
+      .build()
+)
+
+private[v1] class BadParameterException(msg: String) extends WebApplicationException(
+  new IllegalArgumentException(msg),
+  Response
+    .status(Response.Status.BAD_REQUEST)
+    .entity(ErrorWrapper(msg))
+    .build()
+) {
+  def this(param: String, exp: String, actual: String) = {
+    this(raw"""Bad value for parameter "$param".  Expected a $exp, got "$actual"""")
+  }
+}
+
+/**
+ * Signal to JacksonMessageWriter to not convert the message into json (which would result in an
+ * extra set of quotes).
+ */
+private[v1] case class ErrorWrapper(s: String)
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
new file mode 100644
index 0000000000000..b5ef72649e295
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.core.MediaType
+import javax.ws.rs.{Produces, PathParam, GET}
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneApplicationResource(uiRoot: UIRoot) {
+
+  @GET
+  def getApp(@PathParam("appId") appId: String): ApplicationInfo = {
+    val apps = uiRoot.getApplicationInfoList.find { _.id == appId }
+    apps.getOrElse(throw new NotFoundException("unknown app: " + appId))
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala
new file mode 100644
index 0000000000000..6d8a60d480aed
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{PathParam, GET, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.UIData.JobUIData
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneJobResource(ui: SparkUI) {
+
+  @GET
+  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
+    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
+      AllJobsResource.getStatusToJobs(ui)
+    val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId}
+    jobOpt.map { job =>
+      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
+    }.getOrElse {
+      throw new NotFoundException("unknown job: " + jobId)
+    }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala
new file mode 100644
index 0000000000000..07b224fac4786
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.{PathParam, GET, Produces}
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.ui.SparkUI
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneRDDResource(ui: SparkUI) {
+
+  @GET
+  def rddData(@PathParam("rddId") rddId: Int): RDDStorageInfo  = {
+    AllRDDResource.getRDDStorageInfo(rddId, ui.storageListener, true).getOrElse(
+      throw new NotFoundException(s"no rdd found w/ id $rddId")
+    )
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
new file mode 100644
index 0000000000000..fd24aea63a8a1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.SparkException
+import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.status.api.v1.StageStatus._
+import org.apache.spark.status.api.v1.TaskSorting._
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.ui.jobs.JobProgressListener
+import org.apache.spark.ui.jobs.UIData.StageUIData
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class OneStageResource(ui: SparkUI) {
+
+  @GET
+  @Path("")
+  def stageData(@PathParam("stageId") stageId: Int): Seq[StageData] = {
+    withStage(stageId){ stageAttempts =>
+      stageAttempts.map { stage =>
+        AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui,
+          includeDetails = true)
+      }
+    }
+  }
+
+  @GET
+  @Path("/{stageAttemptId: \\d+}")
+  def oneAttemptData(
+      @PathParam("stageId") stageId: Int,
+      @PathParam("stageAttemptId") stageAttemptId: Int): StageData = {
+    withStageAttempt(stageId, stageAttemptId) { stage =>
+      AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui,
+        includeDetails = true)
+    }
+  }
+
+  @GET
+  @Path("/{stageAttemptId: \\d+}/taskSummary")
+  def taskSummary(
+      @PathParam("stageId") stageId: Int,
+      @PathParam("stageAttemptId") stageAttemptId: Int,
+      @DefaultValue("0.05,0.25,0.5,0.75,0.95") @QueryParam("quantiles") quantileString: String)
+  : TaskMetricDistributions = {
+    withStageAttempt(stageId, stageAttemptId) { stage =>
+      val quantiles = quantileString.split(",").map { s =>
+        try {
+          s.toDouble
+        } catch {
+          case nfe: NumberFormatException =>
+            throw new BadParameterException("quantiles", "double", s)
+        }
+      }
+      AllStagesResource.taskMetricDistributions(stage.ui.taskData.values, quantiles)
+    }
+  }
+
+  @GET
+  @Path("/{stageAttemptId: \\d+}/taskList")
+  def taskList(
+      @PathParam("stageId") stageId: Int,
+      @PathParam("stageAttemptId") stageAttemptId: Int,
+      @DefaultValue("0") @QueryParam("offset") offset: Int,
+      @DefaultValue("20") @QueryParam("length") length: Int,
+      @DefaultValue("ID") @QueryParam("sortBy") sortBy: TaskSorting): Seq[TaskData] = {
+    withStageAttempt(stageId, stageAttemptId) { stage =>
+      val tasks = stage.ui.taskData.values.map{AllStagesResource.convertTaskData}.toIndexedSeq
+        .sorted(OneStageResource.ordering(sortBy))
+      tasks.slice(offset, offset + length)  
+    }
+  }
+
+  private case class StageStatusInfoUi(status: StageStatus, info: StageInfo, ui: StageUIData)
+
+  private def withStage[T](stageId: Int)(f: Seq[StageStatusInfoUi] => T): T = {
+    val stageAttempts = findStageStatusUIData(ui.jobProgressListener, stageId)
+    if (stageAttempts.isEmpty) {
+      throw new NotFoundException("unknown stage: " + stageId)
+    } else {
+      f(stageAttempts)
+    }
+  }
+
+  private def findStageStatusUIData(
+      listener: JobProgressListener,
+      stageId: Int): Seq[StageStatusInfoUi] = {
+    listener.synchronized {
+      def getStatusInfoUi(status: StageStatus, infos: Seq[StageInfo]): Seq[StageStatusInfoUi] = {
+        infos.filter { _.stageId == stageId }.map { info =>
+          val ui = listener.stageIdToData.getOrElse((info.stageId, info.attemptId),
+            // this is an internal error -- we should always have uiData
+            throw new SparkException(
+              s"no stage ui data found for stage: ${info.stageId}:${info.attemptId}")
+          )
+          StageStatusInfoUi(status, info, ui)
+        }
+      }
+      getStatusInfoUi(ACTIVE, listener.activeStages.values.toSeq) ++
+        getStatusInfoUi(COMPLETE, listener.completedStages) ++
+        getStatusInfoUi(FAILED, listener.failedStages) ++
+        getStatusInfoUi(PENDING, listener.pendingStages.values.toSeq)
+    }
+  }
+
+  private def withStageAttempt[T](
+      stageId: Int,
+      stageAttemptId: Int)
+      (f: StageStatusInfoUi => T): T = {
+    withStage(stageId) { attempts =>
+        val oneAttempt = attempts.find { stage => stage.info.attemptId == stageAttemptId }
+        oneAttempt match {
+          case Some(stage) =>
+            f(stage)
+          case None =>
+            val stageAttempts = attempts.map { _.info.attemptId }
+            throw new NotFoundException(s"unknown attempt for stage $stageId.  " +
+              s"Found attempts: ${stageAttempts.mkString("[", ",", "]")}")
+        }
+    }
+  }
+}
+
+object OneStageResource {
+  def ordering(taskSorting: TaskSorting): Ordering[TaskData] = {
+    val extractor: (TaskData => Long) = td =>
+      taskSorting match {
+        case ID => td.taskId
+        case INCREASING_RUNTIME => td.taskMetrics.map{_.executorRunTime}.getOrElse(-1L)
+        case DECREASING_RUNTIME => -td.taskMetrics.map{_.executorRunTime}.getOrElse(-1L)
+      }
+    Ordering.by(extractor)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
new file mode 100644
index 0000000000000..95fbd96ade5ab
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs.WebApplicationException
+import javax.ws.rs.core.Response
+
+import com.sun.jersey.spi.container.{ContainerRequest, ContainerRequestFilter}
+
+private[v1] class SecurityFilter extends ContainerRequestFilter with UIRootFromServletContext {
+  def filter(req: ContainerRequest): ContainerRequest = {
+    val user = Option(req.getUserPrincipal).map { _.getName }.orNull
+    if (uiRoot.securityManager.checkUIViewPermissions(user)) {
+      req
+    } else {
+      throw new WebApplicationException(
+        Response
+          .status(Response.Status.FORBIDDEN)
+          .entity(raw"""user "$user"is not authorized""")
+          .build()
+      )
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
new file mode 100644
index 0000000000000..cee29786c3019
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.text.SimpleDateFormat
+import java.util.TimeZone
+import javax.ws.rs.WebApplicationException
+import javax.ws.rs.core.Response
+import javax.ws.rs.core.Response.Status
+
+import scala.util.Try
+
+private[v1] class SimpleDateParam(val originalValue: String) {
+  val timestamp: Long = {
+    SimpleDateParam.formats.collectFirst {
+      case fmt if Try(fmt.parse(originalValue)).isSuccess =>
+        fmt.parse(originalValue).getTime()
+    }.getOrElse(
+      throw new WebApplicationException(
+        Response
+          .status(Status.BAD_REQUEST)
+          .entity("Couldn't parse date: " + originalValue)
+          .build()
+      )
+    )
+  }
+}
+
+private[v1] object SimpleDateParam {
+
+  val formats: Seq[SimpleDateFormat] = {
+
+    val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
+    gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
+
+    Seq(
+      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz"),
+      gmtDay
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
new file mode 100644
index 0000000000000..ef3c8570d8186
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.Date
+
+import scala.collection.Map
+
+import org.apache.spark.JobExecutionStatus
+
+class ApplicationInfo private[spark](
+    val id: String,
+    val name: String,
+    val attempts: Seq[ApplicationAttemptInfo])
+
+class ApplicationAttemptInfo private[spark](
+    val attemptId: Option[String],
+    val startTime: Date,
+    val endTime: Date,
+    val sparkUser: String,
+    val completed: Boolean = false)
+
+class ExecutorStageSummary private[spark](
+    val taskTime : Long,
+    val failedTasks : Int,
+    val succeededTasks : Int,
+    val inputBytes : Long,
+    val outputBytes : Long,
+    val shuffleRead : Long,
+    val shuffleWrite : Long,
+    val memoryBytesSpilled : Long,
+    val diskBytesSpilled : Long)
+
+class ExecutorSummary private[spark](
+    val id: String,
+    val hostPort: String,
+    val rddBlocks: Int,
+    val memoryUsed: Long,
+    val diskUsed: Long,
+    val activeTasks: Int,
+    val failedTasks: Int,
+    val completedTasks: Int,
+    val totalTasks: Int,
+    val totalDuration: Long,
+    val totalInputBytes: Long,
+    val totalShuffleRead: Long,
+    val totalShuffleWrite: Long,
+    val maxMemory: Long,
+    val executorLogs: Map[String, String])
+
+class JobData private[spark](
+    val jobId: Int,
+    val name: String,
+    val description: Option[String],
+    val submissionTime: Option[Date],
+    val completionTime: Option[Date],
+    val stageIds: Seq[Int],
+    val jobGroup: Option[String],
+    val status: JobExecutionStatus,
+    val numTasks: Int,
+    val numActiveTasks: Int,
+    val numCompletedTasks: Int,
+    val numSkippedTasks: Int,
+    val numFailedTasks: Int,
+    val numActiveStages: Int,
+    val numCompletedStages: Int,
+    val numSkippedStages: Int,
+    val numFailedStages: Int)
+
+// Q: should Tachyon size go in here as well?  currently the UI only shows it on the overall storage
+// page ... does anybody pay attention to it?
+class RDDStorageInfo private[spark](
+    val id: Int,
+    val name: String,
+    val numPartitions: Int,
+    val numCachedPartitions: Int,
+    val storageLevel: String,
+    val memoryUsed: Long,
+    val diskUsed: Long,
+    val dataDistribution: Option[Seq[RDDDataDistribution]],
+    val partitions: Option[Seq[RDDPartitionInfo]])
+
+class RDDDataDistribution private[spark](
+    val address: String,
+    val memoryUsed: Long,
+    val memoryRemaining: Long,
+    val diskUsed: Long)
+
+class RDDPartitionInfo private[spark](
+    val blockName: String,
+    val storageLevel: String,
+    val memoryUsed: Long,
+    val diskUsed: Long,
+    val executors: Seq[String])
+
+class StageData private[spark](
+    val status: StageStatus,
+    val stageId: Int,
+    val attemptId: Int,
+    val numActiveTasks: Int ,
+    val numCompleteTasks: Int,
+    val numFailedTasks: Int,
+
+    val executorRunTime: Long,
+
+    val inputBytes: Long,
+    val inputRecords: Long,
+    val outputBytes: Long,
+    val outputRecords: Long,
+    val shuffleReadBytes: Long,
+    val shuffleReadRecords: Long,
+    val shuffleWriteBytes: Long,
+    val shuffleWriteRecords: Long,
+    val memoryBytesSpilled: Long,
+    val diskBytesSpilled: Long,
+
+    val name: String,
+    val details: String,
+    val schedulingPool: String,
+
+    val accumulatorUpdates: Seq[AccumulableInfo],
+    val tasks: Option[Map[Long, TaskData]],
+    val executorSummary:Option[Map[String,ExecutorStageSummary]])
+
+class TaskData private[spark](
+    val taskId: Long,
+    val index: Int,
+    val attempt: Int,
+    val launchTime: Date,
+    val executorId: String,
+    val host: String,
+    val taskLocality: String,
+    val speculative: Boolean,
+    val accumulatorUpdates: Seq[AccumulableInfo],
+    val errorMessage: Option[String] = None,
+    val taskMetrics: Option[TaskMetrics] = None)
+
+class TaskMetrics private[spark](
+    val executorDeserializeTime: Long,
+    val executorRunTime: Long,
+    val resultSize: Long,
+    val jvmGcTime: Long,
+    val resultSerializationTime: Long,
+    val memoryBytesSpilled: Long,
+    val diskBytesSpilled: Long,
+    val inputMetrics: Option[InputMetrics],
+    val outputMetrics: Option[OutputMetrics],
+    val shuffleReadMetrics: Option[ShuffleReadMetrics],
+    val shuffleWriteMetrics: Option[ShuffleWriteMetrics])
+
+class InputMetrics private[spark](
+    val bytesRead: Long,
+    val recordsRead: Long)
+
+class OutputMetrics private[spark](
+    val bytesWritten: Long,
+    val recordsWritten: Long)
+
+class ShuffleReadMetrics private[spark](
+    val remoteBlocksFetched: Int,
+    val localBlocksFetched: Int,
+    val fetchWaitTime: Long,
+    val remoteBytesRead: Long,
+    val totalBlocksFetched: Int,
+    val recordsRead: Long)
+
+class ShuffleWriteMetrics private[spark](
+    val bytesWritten: Long,
+    val writeTime: Long,
+    val recordsWritten: Long)
+
+class TaskMetricDistributions private[spark](
+    val quantiles: IndexedSeq[Double],
+
+    val executorDeserializeTime: IndexedSeq[Double],
+    val executorRunTime: IndexedSeq[Double],
+    val resultSize: IndexedSeq[Double],
+    val jvmGcTime: IndexedSeq[Double],
+    val resultSerializationTime: IndexedSeq[Double],
+    val memoryBytesSpilled: IndexedSeq[Double],
+    val diskBytesSpilled: IndexedSeq[Double],
+
+    val inputMetrics: Option[InputMetricDistributions],
+    val outputMetrics: Option[OutputMetricDistributions],
+    val shuffleReadMetrics: Option[ShuffleReadMetricDistributions],
+    val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions])
+
+class InputMetricDistributions private[spark](
+    val bytesRead: IndexedSeq[Double],
+    val recordsRead: IndexedSeq[Double])
+
+class OutputMetricDistributions private[spark](
+    val bytesWritten: IndexedSeq[Double],
+    val recordsWritten: IndexedSeq[Double])
+
+class ShuffleReadMetricDistributions private[spark](
+    val readBytes: IndexedSeq[Double],
+    val readRecords: IndexedSeq[Double],
+    val remoteBlocksFetched: IndexedSeq[Double],
+    val localBlocksFetched: IndexedSeq[Double],
+    val fetchWaitTime: IndexedSeq[Double],
+    val remoteBytesRead: IndexedSeq[Double],
+    val totalBlocksFetched: IndexedSeq[Double])
+
+class ShuffleWriteMetricDistributions private[spark](
+    val writeBytes: IndexedSeq[Double],
+    val writeRecords: IndexedSeq[Double],
+    val writeTime: IndexedSeq[Double])
+
+class AccumulableInfo private[spark](
+    val id: Long,
+    val name: String,
+    val update: Option[String],
+    val value: String)
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index 7d75929b96f75..ec711480ebf30 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -25,13 +25,17 @@ import org.apache.spark.scheduler._
 /**
  * :: DeveloperApi ::
  * A SparkListener that maintains executor storage status.
+ *
+ * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
 class StorageStatusListener extends SparkListener {
   // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE)
   private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
 
-  def storageStatusList: Seq[StorageStatus] = executorIdToStorageStatus.values.toSeq
+  def storageStatusList: Seq[StorageStatus] = synchronized {
+    executorIdToStorageStatus.values.toSeq
+  }
 
   /** Update storage status list to reflect updated block statuses */
   private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index a5271f0574e6c..bfe4a180e8a6f 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.ui
 
+import java.util.Date
+
+import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo, JsonRootResource, UIRoot}
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.StorageStatusListener
@@ -33,7 +36,7 @@ import org.apache.spark.ui.scope.RDDOperationGraphListener
 private[spark] class SparkUI private (
     val sc: Option[SparkContext],
     val conf: SparkConf,
-    val securityManager: SecurityManager,
+    securityManager: SecurityManager,
     val environmentListener: EnvironmentListener,
     val storageStatusListener: StorageStatusListener,
     val executorsListener: ExecutorsListener,
@@ -41,22 +44,27 @@ private[spark] class SparkUI private (
     val storageListener: StorageListener,
     val operationGraphListener: RDDOperationGraphListener,
     var appName: String,
-    val basePath: String)
+    val basePath: String,
+    val startTime: Long)
   extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath, "SparkUI")
-  with Logging {
+  with Logging
+  with UIRoot {
 
   val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false)
 
+
+  val stagesTab = new StagesTab(this)
+
   /** Initialize all components of the server. */
   def initialize() {
     attachTab(new JobsTab(this))
-    val stagesTab = new StagesTab(this)
     attachTab(stagesTab)
     attachTab(new StorageTab(this))
     attachTab(new EnvironmentTab(this))
     attachTab(new ExecutorsTab(this))
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
     attachHandler(createRedirectHandler("/", "/jobs", basePath = basePath))
+    attachHandler(JsonRootResource.getJsonServlet(this))
     // This should be POST only, but, the YARN AM proxy won't proxy POSTs
     attachHandler(createRedirectHandler(
       "/stages/stage/kill", "/stages", stagesTab.handleKillRequest,
@@ -83,6 +91,24 @@ private[spark] class SparkUI private (
   private[spark] def appUIHostPort = publicHostName + ":" + boundPort
 
   private[spark] def appUIAddress = s"http://$appUIHostPort"
+
+  def getSparkUI(appId: String): Option[SparkUI] = {
+    if (appId == appName) Some(this) else None
+  }
+
+  def getApplicationInfoList: Iterator[ApplicationInfo] = {
+    Iterator(new ApplicationInfo(
+      id = appName,
+      name = appName,
+      attempts = Seq(new ApplicationAttemptInfo(
+        attemptId = None,
+        startTime = new Date(startTime),
+        endTime = new Date(-1),
+        sparkUser = "",
+        completed = false
+      ))
+    ))
+  }
 }
 
 private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String)
@@ -109,9 +135,10 @@ private[spark] object SparkUI {
       listenerBus: SparkListenerBus,
       jobProgressListener: JobProgressListener,
       securityManager: SecurityManager,
-      appName: String): SparkUI =  {
+      appName: String,
+      startTime: Long): SparkUI =  {
     create(Some(sc), conf, listenerBus, securityManager, appName,
-      jobProgressListener = Some(jobProgressListener))
+      jobProgressListener = Some(jobProgressListener), startTime = startTime)
   }
 
   def createHistoryUI(
@@ -119,8 +146,9 @@ private[spark] object SparkUI {
       listenerBus: SparkListenerBus,
       securityManager: SecurityManager,
       appName: String,
-      basePath: String): SparkUI = {
-    create(None, conf, listenerBus, securityManager, appName, basePath)
+      basePath: String,
+      startTime: Long): SparkUI = {
+    create(None, conf, listenerBus, securityManager, appName, basePath, startTime = startTime)
   }
 
   /**
@@ -137,7 +165,8 @@ private[spark] object SparkUI {
       securityManager: SecurityManager,
       appName: String,
       basePath: String = "",
-      jobProgressListener: Option[JobProgressListener] = None): SparkUI = {
+      jobProgressListener: Option[JobProgressListener] = None,
+      startTime: Long): SparkUI = {
 
     val _jobProgressListener: JobProgressListener = jobProgressListener.getOrElse {
       val listener = new JobProgressListener(conf)
@@ -159,6 +188,6 @@ private[spark] object SparkUI {
 
     new SparkUI(sc, conf, securityManager, environmentListener, storageStatusListener,
       executorsListener, _jobProgressListener, storageListener, operationGraphListener,
-      appName, basePath)
+      appName, basePath, startTime)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index f9860d1a5ce76..384f2ad26e281 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -37,7 +37,7 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf}
  * pages. The use of tabs is optional, however; a WebUI may choose to include pages directly.
  */
 private[spark] abstract class WebUI(
-    securityManager: SecurityManager,
+    val securityManager: SecurityManager,
     port: Int,
     conf: SparkConf,
     basePath: String = "",
@@ -77,15 +77,9 @@ private[spark] abstract class WebUI(
     val pagePath = "/" + page.prefix
     val renderHandler = createServletHandler(pagePath,
       (request: HttpServletRequest) => page.render(request), securityManager, basePath)
-    val renderJsonHandler = createServletHandler(pagePath.stripSuffix("/") + "/json",
-      (request: HttpServletRequest) => page.renderJson(request), securityManager, basePath)
     attachHandler(renderHandler)
-    attachHandler(renderJsonHandler)
     pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
       .append(renderHandler)
-    pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
-      .append(renderJsonHandler)
-    
   }
 
   /** Attach a handler to this UI. */
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index 956608d7c0cbe..b247e4cdc3bd4 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -22,11 +22,11 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.apache.spark.status.api.v1.ExecutorSummary
 import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
-/** Summary information about an executor to display in the UI. */
-// Needs to be private[ui] because of a false positive MiMa failure.
+// This isn't even used anymore -- but we need to keep it b/c of a MiMa false positive
 private[ui] case class ExecutorSummaryInfo(
     id: String,
     hostPort: String,
@@ -44,6 +44,7 @@ private[ui] case class ExecutorSummaryInfo(
     maxMemory: Long,
     executorLogs: Map[String, String])
 
+
 private[ui] class ExecutorsPage(
     parent: ExecutorsTab,
     threadDumpEnabled: Boolean)
@@ -55,7 +56,8 @@ private[ui] class ExecutorsPage(
     val maxMem = storageStatusList.map(_.maxMem).sum
     val memUsed = storageStatusList.map(_.memUsed).sum
     val diskUsed = storageStatusList.map(_.diskUsed).sum
-    val execInfo = for (statusId <- 0 until storageStatusList.size) yield getExecInfo(statusId)
+    val execInfo = for (statusId <- 0 until storageStatusList.size) yield
+      ExecutorsPage.getExecInfo(listener, statusId)
     val execInfoSorted = execInfo.sortBy(_.id)
     val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty
 
@@ -111,7 +113,7 @@ private[ui] class ExecutorsPage(
   }
 
   /** Render an HTML row representing an executor */
-  private def execRow(info: ExecutorSummaryInfo, logsExist: Boolean): Seq[Node] = {
+  private def execRow(info: ExecutorSummary, logsExist: Boolean): Seq[Node] = {
     val maximumMemory = info.maxMemory
     val memoryUsed = info.memoryUsed
     val diskUsed = info.diskUsed
@@ -170,8 +172,11 @@ private[ui] class ExecutorsPage(
     </tr>
   }
 
+}
+
+private[spark] object ExecutorsPage {
   /** Represent an executor's info as a map given a storage status index */
-  private def getExecInfo(statusId: Int): ExecutorSummaryInfo = {
+  def getExecInfo(listener: ExecutorsListener, statusId: Int): ExecutorSummary = {
     val status = listener.storageStatusList(statusId)
     val execId = status.blockManagerId.executorId
     val hostPort = status.blockManagerId.hostPort
@@ -189,7 +194,7 @@ private[ui] class ExecutorsPage(
     val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L)
     val executorLogs = listener.executorToLogUrls.getOrElse(execId, Map.empty)
 
-    new ExecutorSummaryInfo(
+    new ExecutorSummary(
       execId,
       hostPort,
       rddBlocks,
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index f6abf27db49dd..09323d1d80ad6 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -271,6 +271,12 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
       val shouldShowCompletedJobs = completedJobs.nonEmpty
       val shouldShowFailedJobs = failedJobs.nonEmpty
 
+      val completedJobNumStr = if (completedJobs.size == listener.numCompletedJobs) {
+        s"${completedJobs.size}"
+      } else {
+        s"${listener.numCompletedJobs}, only showing ${completedJobs.size}"
+      }
+
       val summary: NodeSeq =
         <div>
           <ul class="unstyled">
@@ -295,9 +301,9 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
             }
             {
               if (shouldShowCompletedJobs) {
-                <li>
+                <li id="completed-summary">
                   <a href="#completed"><strong>Completed Jobs:</strong></a>
-                  {completedJobs.size}
+                  {completedJobNumStr}
                 </li>
               }
             }
@@ -305,7 +311,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
               if (shouldShowFailedJobs) {
                 <li>
                   <a href="#failed"><strong>Failed Jobs:</strong></a>
-                  {failedJobs.size}
+                  {listener.numFailedJobs}
                 </li>
               }
             }
@@ -322,7 +328,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
           activeJobsTable
       }
       if (shouldShowCompletedJobs) {
-        content ++= <h4 id="completed">Completed Jobs ({completedJobs.size})</h4> ++
+        content ++= <h4 id="completed">Completed Jobs ({completedJobNumStr})</h4> ++
           completedJobsTable
       }
       if (shouldShowFailedJobs) {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
index 236bc8ea92879..a37f739ab9c66 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -64,6 +64,12 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
       val shouldShowCompletedStages = completedStages.nonEmpty
       val shouldShowFailedStages = failedStages.nonEmpty
 
+      val completedStageNumStr = if (numCompletedStages == completedStages.size) {
+        s"$numCompletedStages"
+      } else {
+        s"$numCompletedStages, only showing ${completedStages.size}"
+      }
+
       val summary: NodeSeq =
         <div>
           <ul class="unstyled">
@@ -98,9 +104,9 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
             }
             {
               if (shouldShowCompletedStages) {
-                <li>
+                <li id="completed-summary">
                   <a href="#completed"><strong>Completed Stages:</strong></a>
-                  {numCompletedStages}
+                  {completedStageNumStr}
                 </li>
               }
             }
@@ -132,7 +138,7 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
         pendingStagesTable.toNodeSeq
       }
       if (shouldShowCompletedStages) {
-        content ++= <h4 id="completed">Completed Stages ({numCompletedStages})</h4> ++
+        content ++= <h4 id="completed">Completed Stages ({completedStageNumStr})</h4> ++
         completedStagesTable.toNodeSeq
       }
       if (shouldShowFailedStages) {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 96cc3d78d0f15..7163217e1fed0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -187,7 +187,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
       val jobDataOption = listener.jobIdToData.get(jobId)
       if (jobDataOption.isEmpty) {
         val content =
-          <div>
+          <div id="no-info">
             <p>No information to display for job {jobId}</p>
           </div>
         return UIUtils.headerSparkPage(
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 8f9aa9fdec819..246e191d64776 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -74,6 +74,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
   // JobProgressListener's retention limits.
   var numCompletedStages = 0
   var numFailedStages = 0
+  var numCompletedJobs = 0
+  var numFailedJobs = 0
 
   // Misc:
   val executorIdToBlockManagerId = HashMap[ExecutorId, BlockManagerId]()
@@ -217,10 +219,12 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
         completedJobs += jobData
         trimJobsIfNecessary(completedJobs)
         jobData.status = JobExecutionStatus.SUCCEEDED
+        numCompletedJobs += 1
       case JobFailed(exception) =>
         failedJobs += jobData
         trimJobsIfNecessary(failedJobs)
         jobData.status = JobExecutionStatus.FAILED
+        numFailedJobs += 1
     }
     for (stageId <- jobData.stageIds) {
       stageIdToActiveJobIds.get(stageId).foreach { jobsUsingStage =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index d725b9d8565ac..f3e0b38523f32 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -21,7 +21,7 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.apache.spark.scheduler.{Schedulable, StageInfo}
+import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 
 /** Page showing specific pool details */
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 6c4305873cbd9..b01fad8e453c8 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -25,11 +25,11 @@ import scala.xml.{Elem, Node, Unparsed}
 import org.apache.commons.lang3.StringEscapeUtils
 
 import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 import org.apache.spark.ui.{ToolTips, WebUIPage, UIUtils}
 import org.apache.spark.ui.jobs.UIData._
 import org.apache.spark.ui.scope.RDDOperationGraph
 import org.apache.spark.util.{Utils, Distribution}
-import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 
 /** Page showing statistics and task list for a given stage */
 private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
@@ -52,14 +52,22 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val stageAttemptId = parameterAttempt.toInt
       val stageDataOption = progressListener.stageIdToData.get((stageId, stageAttemptId))
 
-      if (stageDataOption.isEmpty || stageDataOption.get.taskData.isEmpty) {
+      val stageHeader = s"Details for Stage $stageId (Attempt $stageAttemptId)"
+      if (stageDataOption.isEmpty) {
+        val content =
+          <div id="no-info">
+            <p>No information to display for Stage {stageId} (Attempt {stageAttemptId})</p>
+          </div>
+        return UIUtils.headerSparkPage(stageHeader, content, parent)
+
+      }
+      if (stageDataOption.get.taskData.isEmpty) {
         val content =
           <div>
             <h4>Summary Metrics</h4> No tasks have started yet
             <h4>Tasks</h4> No tasks have started yet
           </div>
-        return UIUtils.headerSparkPage(
-          s"Details for Stage $stageId (Attempt $stageAttemptId)", content, parent)
+        return UIUtils.headerSparkPage(stageHeader, content, parent)
       }
 
       val stageData = stageDataOption.get
@@ -458,8 +466,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         maybeAccumulableTable ++
         <h4>Tasks</h4> ++ taskTable
 
-      UIUtils.headerSparkPage(
-        "Details for Stage %d".format(stageId), content, parent, showVisualization = true)
+      UIUtils.headerSparkPage(stageHeader, content, parent, showVisualization = true)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 199f731b92bcc..05f94a7507f4f 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -21,8 +21,8 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.apache.spark.storage.{BlockId, BlockStatus, StorageStatus, StorageUtils}
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.status.api.v1.{AllRDDResource, RDDDataDistribution, RDDPartitionInfo}
+import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 /** Page showing storage details for a given RDD */
@@ -32,28 +32,19 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
   def render(request: HttpServletRequest): Seq[Node] = {
     val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
-
     val rddId = parameterId.toInt
-    val storageStatusList = listener.storageStatusList
-    val rddInfo = listener.rddInfoList.find(_.id == rddId).getOrElse {
-      // Rather than crashing, render an "RDD Not Found" page
-      return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent)
-    }
+    val rddStorageInfo = AllRDDResource.getRDDStorageInfo(rddId, listener,includeDetails = true)
+      .getOrElse {
+        // Rather than crashing, render an "RDD Not Found" page
+        return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent)
+      }
 
     // Worker table
-    val workers = storageStatusList.map((rddId, _))
-    val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers,
-      id = Some("rdd-storage-by-worker-table"))
+    val workerTable = UIUtils.listingTable(workerHeader, workerRow,
+      rddStorageInfo.dataDistribution.get, id = Some("rdd-storage-by-worker-table"))
 
     // Block table
-    val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList)
-    val blocks = storageStatusList
-      .flatMap(_.rddBlocksById(rddId))
-      .sortWith(_._1.name < _._1.name)
-      .map { case (blockId, status) =>
-        (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown")))
-      }
-    val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks,
+    val blockTable = UIUtils.listingTable(blockHeader, blockRow, rddStorageInfo.partitions.get,
       id = Some("rdd-storage-by-block-table"))
 
     val content =
@@ -62,23 +53,23 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
           <ul class="unstyled">
             <li>
               <strong>Storage Level:</strong>
-              {rddInfo.storageLevel.description}
+              {rddStorageInfo.storageLevel}
             </li>
             <li>
               <strong>Cached Partitions:</strong>
-              {rddInfo.numCachedPartitions}
+              {rddStorageInfo.numCachedPartitions}
             </li>
             <li>
               <strong>Total Partitions:</strong>
-              {rddInfo.numPartitions}
+              {rddStorageInfo.numPartitions}
             </li>
             <li>
               <strong>Memory Size:</strong>
-              {Utils.bytesToString(rddInfo.memSize)}
+              {Utils.bytesToString(rddStorageInfo.memoryUsed)}
             </li>
             <li>
               <strong>Disk Size:</strong>
-              {Utils.bytesToString(rddInfo.diskSize)}
+              {Utils.bytesToString(rddStorageInfo.diskUsed)}
             </li>
           </ul>
         </div>
@@ -86,19 +77,19 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> Data Distribution on {workers.size} Executors </h4>
+          <h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
           {workerTable}
         </div>
       </div>
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> {blocks.size} Partitions </h4>
+          <h4> {rddStorageInfo.partitions.size} Partitions </h4>
           {blockTable}
         </div>
       </div>;
 
-    UIUtils.headerSparkPage("RDD Storage Info for " + rddInfo.name, content, parent)
+    UIUtils.headerSparkPage("RDD Storage Info for " + rddStorageInfo.name, content, parent)
   }
 
   /** Header fields for the worker table */
@@ -116,34 +107,32 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
     "Executors")
 
   /** Render an HTML row representing a worker */
-  private def workerRow(worker: (Int, StorageStatus)): Seq[Node] = {
-    val (rddId, status) = worker
+  private def workerRow(worker: RDDDataDistribution): Seq[Node] = {
     <tr>
-      <td>{status.blockManagerId.host + ":" + status.blockManagerId.port}</td>
+      <td>{worker.address}</td>
       <td>
-        {Utils.bytesToString(status.memUsedByRdd(rddId))}
-        ({Utils.bytesToString(status.memRemaining)} Remaining)
+        {Utils.bytesToString(worker.memoryUsed)}
+        ({Utils.bytesToString(worker.memoryRemaining)} Remaining)
       </td>
-      <td>{Utils.bytesToString(status.diskUsedByRdd(rddId))}</td>
+      <td>{Utils.bytesToString(worker.diskUsed)}</td>
     </tr>
   }
 
   /** Render an HTML row representing a block */
-  private def blockRow(row: (BlockId, BlockStatus, Seq[String])): Seq[Node] = {
-    val (id, block, locations) = row
+  private def blockRow(row: RDDPartitionInfo): Seq[Node] = {
     <tr>
-      <td>{id}</td>
+      <td>{row.blockName}</td>
       <td>
-        {block.storageLevel.description}
+        {row.storageLevel}
       </td>
-      <td sorttable_customkey={block.memSize.toString}>
-        {Utils.bytesToString(block.memSize)}
+      <td sorttable_customkey={row.memoryUsed.toString}>
+        {Utils.bytesToString(row.memoryUsed)}
       </td>
-      <td sorttable_customkey={block.diskSize.toString}>
-        {Utils.bytesToString(block.diskSize)}
+      <td sorttable_customkey={row.diskUsed.toString}>
+        {Utils.bytesToString(row.diskUsed)}
       </td>
       <td>
-        {locations.map(l => <span>{l}<br/></span>)}
+        {row.executors.map(l => <span>{l}<br/></span>)}
       </td>
     </tr>
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 59dc6b547c1d8..07db783c572cf 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -22,7 +22,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.xml.Node
 
 import org.apache.spark.storage.RDDInfo
-import org.apache.spark.ui.{WebUIPage, UIUtils}
+import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 /** Page showing list of RDD's currently stored in the cluster */
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
index 045bd784990d1..0351749700962 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -35,6 +35,8 @@ private[ui] class StorageTab(parent: SparkUI) extends SparkUITab(parent, "storag
 /**
  * :: DeveloperApi ::
  * A SparkListener that prepares information to be displayed on the BlockManagerUI.
+ *
+ * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
 class StorageListener(storageStatusListener: StorageStatusListener) extends SparkListener {
@@ -43,7 +45,9 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Spar
   def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList
 
   /** Filter RDD info to include only those with cached partitions */
-  def rddInfoList: Seq[RDDInfo] = _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq
+  def rddInfoList: Seq[RDDInfo] = synchronized {
+    _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq
+  }
 
   /** Update the storage info of the RDDs whose blocks are among the given updated blocks */
   private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)]): Unit = {
diff --git a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
new file mode 100644
index 0000000000000..ce4fe80b66aa5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json
@@ -0,0 +1,53 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1425081759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-28T00:02:38.277GMT",
+    "endTime" : "2015-02-28T00:02:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
new file mode 100644
index 0000000000000..31ac9beea8788
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
@@ -0,0 +1,67 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 3,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 162,
+  "inputBytes" : 160,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:17",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 4338,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
new file mode 100644
index 0000000000000..ce4fe80b66aa5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json
@@ -0,0 +1,53 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1425081759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-28T00:02:38.277GMT",
+    "endTime" : "2015-02-28T00:02:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
new file mode 100644
index 0000000000000..cb622e147249e
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -0,0 +1,17 @@
+[ {
+  "id" : "<driver>",
+  "hostPort" : "localhost:57971",
+  "rddBlocks" : 8,
+  "memoryUsed" : 28000128,
+  "diskUsed" : 0,
+  "activeTasks" : 0,
+  "failedTasks" : 1,
+  "completedTasks" : 31,
+  "totalTasks" : 32,
+  "totalDuration" : 8820,
+  "totalInputBytes" : 28000288,
+  "totalShuffleRead" : 0,
+  "totalShuffleWrite" : 13180,
+  "maxMemory" : 278302556,
+  "executorLogs" : { }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
new file mode 100644
index 0000000000000..bff6a4f69d077
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
@@ -0,0 +1,23 @@
+[ {
+  "status" : "FAILED",
+  "stageId" : 2,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 7,
+  "numFailedTasks" : 1,
+  "executorRunTime" : 278,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:20",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
new file mode 100644
index 0000000000000..2e92e1fa0ec23
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json
@@ -0,0 +1,15 @@
+[ {
+  "jobId" : 0,
+  "name" : "foreach at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
new file mode 100644
index 0000000000000..2e92e1fa0ec23
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json
@@ -0,0 +1,15 @@
+[ {
+  "jobId" : 0,
+  "name" : "foreach at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
new file mode 100644
index 0000000000000..cab4750270dfa
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json
@@ -0,0 +1,43 @@
+[ {
+  "jobId" : 2,
+  "name" : "count at <console>:17",
+  "stageIds" : [ 3 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}, {
+  "jobId" : 1,
+  "name" : "count at <console>:20",
+  "stageIds" : [ 1, 2 ],
+  "status" : "FAILED",
+  "numTasks" : 16,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 15,
+  "numSkippedTasks" : 15,
+  "numFailedTasks" : 1,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 1
+}, {
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json
new file mode 100644
index 0000000000000..483632a3956ed
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json
@@ -0,0 +1,10 @@
+[ {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json
new file mode 100644
index 0000000000000..4b85690fd9199
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json
@@ -0,0 +1,19 @@
+[ {
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1422981759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:38.277GMT",
+    "endTime" : "2015-02-03T16:42:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
new file mode 100644
index 0000000000000..dca86fe5f7e6a
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json
@@ -0,0 +1,35 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}, {
+  "id" : "local-1425081759269",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-28T00:02:38.277GMT",
+    "endTime" : "2015-02-28T00:02:46.912GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json
new file mode 100644
index 0000000000000..07489ad96414a
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json
@@ -0,0 +1,10 @@
+{
+  "id" : "local-1422981780767",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-02-03T16:42:59.720GMT",
+    "endTime" : "2015-02-03T16:43:08.731GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json
new file mode 100644
index 0000000000000..8f3d7160c723f
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json
@@ -0,0 +1,17 @@
+{
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "sparkUser" : "irashid",
+    "completed" : true
+  } ]
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json
new file mode 100644
index 0000000000000..4a29072bdb6e4
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json
@@ -0,0 +1,15 @@
+{
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
new file mode 100644
index 0000000000000..38b5328ffbb03
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json
@@ -0,0 +1,64 @@
+{
+  "id" : 0,
+  "name" : "0",
+  "numPartitions" : 8,
+  "numCachedPartitions" : 8,
+  "storageLevel" : "Memory Deserialized 1x Replicated",
+  "memoryUsed" : 28000128,
+  "diskUsed" : 0,
+  "dataDistribution" : [ {
+    "address" : "localhost:57971",
+    "memoryUsed" : 28000128,
+    "memoryRemaining" : 250302428,
+    "diskUsed" : 0
+  } ],
+  "partitions" : [ {
+    "blockName" : "rdd_0_0",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_1",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_2",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_3",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_4",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_5",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_6",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  }, {
+    "blockName" : "rdd_0_7",
+    "storageLevel" : "Memory Deserialized 1x Replicated",
+    "memoryUsed" : 3500016,
+    "diskUsed" : 0,
+    "executors" : [ "localhost:57971" ]
+  } ]
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
new file mode 100644
index 0000000000000..111cb8163eb3d
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -0,0 +1,270 @@
+{
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ],
+  "tasks" : {
+    "8" : {
+      "taskId" : 8,
+      "index" : 0,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.829GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 94000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "11" : {
+      "taskId" : 11,
+      "index" : 3,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1647,
+          "writeTime" : 83000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "14" : {
+      "taskId" : 14,
+      "index" : 6,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.832GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 88000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "13" : {
+      "taskId" : 13,
+      "index" : 5,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 73000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "10" : {
+      "taskId" : 10,
+      "index" : 2,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 76000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "9" : {
+      "taskId" : 9,
+      "index" : 1,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 436,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 0,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 98000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "12" : {
+      "taskId" : 12,
+      "index" : 4,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1645,
+          "writeTime" : 101000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "15" : {
+      "taskId" : 15,
+      "index" : 7,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.833GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 79000,
+          "recordsWritten" : 0
+        }
+      }
+    }
+  },
+  "executorSummary" : {
+    "<driver>" : {
+      "taskTime" : 3624,
+      "failedTasks" : 0,
+      "succeededTasks" : 8,
+      "inputBytes" : 28000128,
+      "outputBytes" : 0,
+      "shuffleRead" : 0,
+      "shuffleWrite" : 13180,
+      "memoryBytesSpilled" : 0,
+      "diskBytesSpilled" : 0
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
new file mode 100644
index 0000000000000..ef339f89afa45
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -0,0 +1,270 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ],
+  "tasks" : {
+    "8" : {
+      "taskId" : 8,
+      "index" : 0,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.829GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 94000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "11" : {
+      "taskId" : 11,
+      "index" : 3,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1647,
+          "writeTime" : 83000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "14" : {
+      "taskId" : 14,
+      "index" : 6,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.832GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 88000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "13" : {
+      "taskId" : 13,
+      "index" : 5,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 73000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "10" : {
+      "taskId" : 10,
+      "index" : 2,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 76000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "9" : {
+      "taskId" : 9,
+      "index" : 1,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.830GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 436,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 0,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 98000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "12" : {
+      "taskId" : 12,
+      "index" : 4,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.831GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 2,
+        "executorRunTime" : 434,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1645,
+          "writeTime" : 101000,
+          "recordsWritten" : 0
+        }
+      }
+    },
+    "15" : {
+      "taskId" : 15,
+      "index" : 7,
+      "attempt" : 0,
+      "launchTime" : "2015-02-03T16:43:05.833GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 1,
+        "executorRunTime" : 435,
+        "resultSize" : 1902,
+        "jvmGcTime" : 19,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0,
+        "inputMetrics" : {
+          "bytesRead" : 3500016,
+          "recordsRead" : 0
+        },
+        "shuffleWriteMetrics" : {
+          "bytesWritten" : 1648,
+          "writeTime" : 79000,
+          "recordsWritten" : 0
+        }
+      }
+    }
+  },
+  "executorSummary" : {
+    "<driver>" : {
+      "taskTime" : 3624,
+      "failedTasks" : 0,
+      "succeededTasks" : 8,
+      "inputBytes" : 28000128,
+      "outputBytes" : 0,
+      "shuffleRead" : 0,
+      "shuffleWrite" : 13180,
+      "memoryBytesSpilled" : 0,
+      "diskBytesSpilled" : 0
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
new file mode 100644
index 0000000000000..f79a31022d214
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
@@ -0,0 +1,9 @@
+[ {
+  "id" : 0,
+  "name" : "0",
+  "numPartitions" : 8,
+  "numCachedPartitions" : 8,
+  "storageLevel" : "Memory Deserialized 1x Replicated",
+  "memoryUsed" : 28000128,
+  "diskUsed" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json
new file mode 100644
index 0000000000000..8878e547a7984
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json
@@ -0,0 +1 @@
+[ ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
new file mode 100644
index 0000000000000..056fac7088594
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
@@ -0,0 +1,89 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 3,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 162,
+  "inputBytes" : 160,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:17",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 1,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 3476,
+  "inputBytes" : 28000128,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 13180,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "map at <console>:14",
+  "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 4338,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+}, {
+  "status" : "FAILED",
+  "stageId" : 2,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 7,
+  "numFailedTasks" : 1,
+  "executorRunTime" : 278,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "count at <console>:20",
+  "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
new file mode 100644
index 0000000000000..79ccacd309693
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
@@ -0,0 +1,27 @@
+[ {
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 120,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "foreach at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "value" : "5050"
+  } ]
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
new file mode 100644
index 0000000000000..f2cb29b31c85f
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.494GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 49294,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3842811,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.502GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3934399,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.503GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 348,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89885,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 1311694,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 83022,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 30,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3675510,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 29,
+    "executorRunTime" : 351,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 4016617,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.506GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 2579051,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 8,
+  "index" : 8,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.914GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 80,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 121551,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 9,
+  "index" : 9,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.915GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60489,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 10,
+  "index" : 10,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.916GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 8,
+    "executorRunTime" : 73,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94709,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 11,
+  "index" : 11,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.918GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 75,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94507,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 15,
+  "index" : 15,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.928GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 602780,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
new file mode 100644
index 0000000000000..c3febc5fc9447
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
@@ -0,0 +1,193 @@
+[ {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.515GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "78",
+    "value" : "5050"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.521GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "247",
+    "value" : "2175"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "378",
+    "value" : "378"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "572",
+    "value" : "950"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "678",
+    "value" : "2853"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "897",
+    "value" : "3750"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "978",
+    "value" : "1928"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-03-16T19:25:36.524GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "1222",
+    "value" : "4972"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
new file mode 100644
index 0000000000000..56d667d88917c
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
@@ -0,0 +1,193 @@
+[ {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.515GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "78",
+    "value" : "5050"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.521GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "247",
+    "value" : "2175"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 14,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "378",
+    "value" : "378"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "572",
+    "value" : "950"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.522GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "678",
+    "value" : "2853"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "897",
+    "value" : "3750"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.523GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "978",
+    "value" : "1928"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-03-17T23:12:16.524GMT",
+  "executorId" : "<driver>",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "update" : "1222",
+    "value" : "4972"
+  } ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 15,
+    "resultSize" : 697,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
new file mode 100644
index 0000000000000..e5ec3bc4c7126
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
@@ -0,0 +1,1401 @@
+[ {
+  "taskId" : 10,
+  "index" : 10,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.916GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 8,
+    "executorRunTime" : 73,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94709,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 11,
+  "index" : 11,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.918GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 75,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94507,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 15,
+  "index" : 15,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.928GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 602780,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 20,
+  "index" : 20,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.014GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97716,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 21,
+  "index" : 21,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.015GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 88,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100270,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 22,
+  "index" : 22,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.018GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 93,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 143427,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 23,
+  "index" : 23,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.031GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 65,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 91844,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 24,
+  "index" : 24,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.098GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 43,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 157194,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 25,
+  "index" : 25,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.103GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 49,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94134,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 26,
+  "index" : 26,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.105GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 38,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108213,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 27,
+  "index" : 27,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.110GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 32,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102019,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 28,
+  "index" : 28,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.113GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 29,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 104299,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 29,
+  "index" : 29,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.114GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 39,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 114938,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 30,
+  "index" : 30,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.118GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 34,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 119770,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 31,
+  "index" : 31,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.127GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 36,
+    "executorRunTime" : 24,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92619,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 32,
+  "index" : 32,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.148GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89603,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 33,
+  "index" : 33,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.149GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 43,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 118329,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 34,
+  "index" : 34,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.156GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 27,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 127746,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 35,
+  "index" : 35,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.161GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 35,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 160963,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 36,
+  "index" : 36,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.164GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 29,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 123855,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 37,
+  "index" : 37,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.165GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 32,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 111869,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 38,
+  "index" : 38,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.166GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 31,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 131158,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 39,
+  "index" : 39,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.180GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98748,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 40,
+  "index" : 40,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.197GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 14,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94792,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 41,
+  "index" : 41,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.200GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90765,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 42,
+  "index" : 42,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.203GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 103713,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 43,
+  "index" : 43,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.204GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 171516,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 44,
+  "index" : 44,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.205GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98293,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 45,
+  "index" : 45,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.206GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 19,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92985,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 46,
+  "index" : 46,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.210GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 1,
+    "executorRunTime" : 31,
+    "resultSize" : 2065,
+    "jvmGcTime" : 6,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 113322,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 47,
+  "index" : 47,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.212GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 103015,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 48,
+  "index" : 48,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.220GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 24,
+    "resultSize" : 2065,
+    "jvmGcTime" : 6,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 139844,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 49,
+  "index" : 49,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.223GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 7,
+    "executorRunTime" : 23,
+    "resultSize" : 2065,
+    "jvmGcTime" : 6,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94984,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 50,
+  "index" : 50,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.240GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 51,
+  "index" : 51,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.242GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96013,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 52,
+  "index" : 52,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.243GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 53,
+  "index" : 53,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.244GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92835,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 54,
+  "index" : 54,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.244GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90506,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 55,
+  "index" : 55,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.246GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 21,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108309,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 56,
+  "index" : 56,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.249GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 20,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90329,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 57,
+  "index" : 57,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.257GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96849,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 58,
+  "index" : 58,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.263GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97521,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 59,
+  "index" : 59,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.265GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100753,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
new file mode 100644
index 0000000000000..5657123a2db15
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 29,
+    "executorRunTime" : 351,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 4016617,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 30,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3675510,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.502GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3934399,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 83022,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.506GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 2579051,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 1311694,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.494GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 49294,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3842811,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.503GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 348,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89885,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 22,
+  "index" : 22,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.018GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 93,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 143427,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 21,
+  "index" : 21,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.015GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 88,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100270,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 9,
+  "index" : 9,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.915GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60489,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 20,
+  "index" : 20,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.014GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97716,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 8,
+  "index" : 8,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.914GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 80,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 121551,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
new file mode 100644
index 0000000000000..5657123a2db15
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 6,
+  "index" : 6,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 29,
+    "executorRunTime" : 351,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 4016617,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 5,
+  "index" : 5,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.505GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 30,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3675510,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 1,
+  "index" : 1,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.502GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 350,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3934399,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 4,
+  "index" : 4,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 83022,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 7,
+  "index" : 7,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.506GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 2579051,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 3,
+  "index" : 3,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.504GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 31,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 1311694,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 0,
+  "index" : 0,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.494GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 349,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 49294,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 3842811,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 2,
+  "index" : 2,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.503GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 32,
+    "executorRunTime" : 348,
+    "resultSize" : 2010,
+    "jvmGcTime" : 7,
+    "resultSerializationTime" : 2,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89885,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 22,
+  "index" : 22,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.018GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 93,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 143427,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 18,
+  "index" : 18,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.010GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 92,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 17,
+  "index" : 17,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.005GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 11,
+    "executorRunTime" : 91,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 99944,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 21,
+  "index" : 21,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.015GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 88,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100270,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 16,
+  "index" : 16,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.001GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 108320,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 19,
+  "index" : 19,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.012GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 5,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95788,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 9,
+  "index" : 9,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.915GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 84,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60489,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101664,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 20,
+  "index" : 20,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.014GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97716,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 14,
+  "index" : 14,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.925GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 83,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95646,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 8,
+  "index" : 8,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.914GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 80,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 60488,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 121551,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 12,
+  "index" : 12,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.923GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 77,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102476,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 13,
+  "index" : 13,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:06.924GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 9,
+    "executorRunTime" : 76,
+    "resultSize" : 2010,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95004,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
new file mode 100644
index 0000000000000..72fe017e9f85d
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
@@ -0,0 +1,561 @@
+[ {
+  "taskId" : 40,
+  "index" : 40,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.197GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 14,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 94792,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 86,
+  "index" : 86,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.374GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 1,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 95848,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 41,
+  "index" : 41,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.200GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90765,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 68,
+  "index" : 68,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.306GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 101750,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 58,
+  "index" : 58,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.263GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 97521,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 43,
+  "index" : 43,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.204GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 171516,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 57,
+  "index" : 57,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.257GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 16,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96849,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 59,
+  "index" : 59,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.265GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 100753,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 32,
+  "index" : 32,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.148GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 89603,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 87,
+  "index" : 87,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.374GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 12,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102159,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 99,
+  "index" : 99,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.426GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70565,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 133964,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 63,
+  "index" : 63,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.276GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 20,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 102779,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 90,
+  "index" : 90,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.385GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98472,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 39,
+  "index" : 39,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.180GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98748,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 42,
+  "index" : 42,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.203GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 10,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 103713,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 51,
+  "index" : 51,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.242GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 2,
+    "executorRunTime" : 17,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 96013,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 50,
+  "index" : 50,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.240GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 4,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 90836,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 53,
+  "index" : 53,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.244GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 6,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 92835,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 44,
+  "index" : 44,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.205GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 3,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 0,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98293,
+      "recordsWritten" : 10
+    }
+  }
+}, {
+  "taskId" : 80,
+  "index" : 80,
+  "attempt" : 0,
+  "launchTime" : "2015-05-06T13:03:07.341GMT",
+  "executorId" : "driver",
+  "host" : "localhost",
+  "taskLocality" : "PROCESS_LOCAL",
+  "speculative" : false,
+  "accumulatorUpdates" : [ ],
+  "taskMetrics" : {
+    "executorDeserializeTime" : 13,
+    "executorRunTime" : 18,
+    "resultSize" : 2065,
+    "jvmGcTime" : 5,
+    "resultSerializationTime" : 0,
+    "memoryBytesSpilled" : 0,
+    "diskBytesSpilled" : 0,
+    "inputMetrics" : {
+      "bytesRead" : 70564,
+      "recordsRead" : 10000
+    },
+    "shuffleWriteMetrics" : {
+      "bytesWritten" : 1710,
+      "writeTime" : 98069,
+      "recordsWritten" : 10
+    }
+  }
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
new file mode 100644
index 0000000000000..bc3c302813de2
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
@@ -0,0 +1,19 @@
+{
+  "quantiles" : [ 0.01, 0.5, 0.99 ],
+  "executorDeserializeTime" : [ 1.0, 3.0, 36.0 ],
+  "executorRunTime" : [ 16.0, 28.0, 351.0 ],
+  "resultSize" : [ 2010.0, 2065.0, 2065.0 ],
+  "jvmGcTime" : [ 0.0, 0.0, 7.0 ],
+  "resultSerializationTime" : [ 0.0, 0.0, 2.0 ],
+  "memoryBytesSpilled" : [ 0.0, 0.0, 0.0 ],
+  "diskBytesSpilled" : [ 0.0, 0.0, 0.0 ],
+  "inputMetrics" : {
+    "bytesRead" : [ 60488.0, 70564.0, 70565.0 ],
+    "recordsRead" : [ 10000.0, 10000.0, 10000.0 ]
+  },
+  "shuffleWriteMetrics" : {
+    "writeBytes" : [ 1710.0, 1710.0, 1710.0 ],
+    "writeRecords" : [ 10.0, 10.0, 10.0 ],
+    "writeTime" : [ 89437.0, 102159.0, 4016617.0 ]
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
new file mode 100644
index 0000000000000..e084c839f1d5a
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
@@ -0,0 +1,19 @@
+{
+  "quantiles" : [ 0.05, 0.25, 0.5, 0.75, 0.95 ],
+  "executorDeserializeTime" : [ 1.0, 2.0, 2.0, 2.0, 3.0 ],
+  "executorRunTime" : [ 30.0, 74.0, 75.0, 76.0, 79.0 ],
+  "resultSize" : [ 1034.0, 1034.0, 1034.0, 1034.0, 1034.0 ],
+  "jvmGcTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "shuffleReadMetrics" : {
+    "readBytes" : [ 17100.0, 17100.0, 17100.0, 17100.0, 17100.0 ],
+    "readRecords" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+    "remoteBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "localBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+    "fetchWaitTime" : [ 0.0, 0.0, 0.0, 1.0, 1.0 ],
+    "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "totalBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ]
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
new file mode 100644
index 0000000000000..6ac7811ce691b
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
@@ -0,0 +1,19 @@
+{
+  "quantiles" : [ 0.05, 0.25, 0.5, 0.75, 0.95 ],
+  "executorDeserializeTime" : [ 2.0, 2.0, 3.0, 7.0, 31.0 ],
+  "executorRunTime" : [ 16.0, 18.0, 28.0, 49.0, 349.0 ],
+  "resultSize" : [ 2010.0, 2065.0, 2065.0, 2065.0, 2065.0 ],
+  "jvmGcTime" : [ 0.0, 0.0, 0.0, 5.0, 7.0 ],
+  "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 1.0 ],
+  "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "inputMetrics" : {
+    "bytesRead" : [ 60488.0, 70564.0, 70564.0, 70564.0, 70564.0 ],
+    "recordsRead" : [ 10000.0, 10000.0, 10000.0, 10000.0, 10000.0 ]
+  },
+  "shuffleWriteMetrics" : {
+    "writeBytes" : [ 1710.0, 1710.0, 1710.0, 1710.0, 1710.0 ],
+    "writeRecords" : [ 10.0, 10.0, 10.0, 10.0, 10.0 ],
+    "writeTime" : [ 90329.0, 95848.0, 102159.0, 121551.0, 2579051.0 ]
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
new file mode 100644
index 0000000000000..32d5731676ad5
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -0,0 +1,242 @@
+{
+  "status" : "COMPLETE",
+  "stageId" : 0,
+  "attemptId" : 0,
+  "numActiveTasks" : 0,
+  "numCompleteTasks" : 8,
+  "numFailedTasks" : 0,
+  "executorRunTime" : 120,
+  "inputBytes" : 0,
+  "inputRecords" : 0,
+  "outputBytes" : 0,
+  "outputRecords" : 0,
+  "shuffleReadBytes" : 0,
+  "shuffleReadRecords" : 0,
+  "shuffleWriteBytes" : 0,
+  "shuffleWriteRecords" : 0,
+  "memoryBytesSpilled" : 0,
+  "diskBytesSpilled" : 0,
+  "name" : "foreach at <console>:15",
+  "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)",
+  "schedulingPool" : "default",
+  "accumulatorUpdates" : [ {
+    "id" : 1,
+    "name" : "my counter",
+    "value" : "5050"
+  } ],
+  "tasks" : {
+    "2" : {
+      "taskId" : 2,
+      "index" : 2,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.522GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "378",
+        "value" : "378"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 13,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "5" : {
+      "taskId" : 5,
+      "index" : 5,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.523GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "897",
+        "value" : "3750"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "4" : {
+      "taskId" : 4,
+      "index" : 4,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.522GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "678",
+        "value" : "2853"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 1,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "7" : {
+      "taskId" : 7,
+      "index" : 7,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.524GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "1222",
+        "value" : "4972"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "1" : {
+      "taskId" : 1,
+      "index" : 1,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.521GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "247",
+        "value" : "2175"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 14,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "3" : {
+      "taskId" : 3,
+      "index" : 3,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.522GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "572",
+        "value" : "950"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 13,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "6" : {
+      "taskId" : 6,
+      "index" : 6,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.523GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "978",
+        "value" : "1928"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 12,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    },
+    "0" : {
+      "taskId" : 0,
+      "index" : 0,
+      "attempt" : 0,
+      "launchTime" : "2015-03-16T19:25:36.515GMT",
+      "executorId" : "<driver>",
+      "host" : "localhost",
+      "taskLocality" : "PROCESS_LOCAL",
+      "speculative" : false,
+      "accumulatorUpdates" : [ {
+        "id" : 1,
+        "name" : "my counter",
+        "update" : "78",
+        "value" : "5050"
+      } ],
+      "taskMetrics" : {
+        "executorDeserializeTime" : 14,
+        "executorRunTime" : 15,
+        "resultSize" : 697,
+        "jvmGcTime" : 0,
+        "resultSerializationTime" : 2,
+        "memoryBytesSpilled" : 0,
+        "diskBytesSpilled" : 0
+      }
+    }
+  },
+  "executorSummary" : {
+    "<driver>" : {
+      "taskTime" : 418,
+      "failedTasks" : 0,
+      "succeededTasks" : 8,
+      "inputBytes" : 0,
+      "outputBytes" : 0,
+      "shuffleRead" : 0,
+      "shuffleWrite" : 0,
+      "memoryBytesSpilled" : 0,
+      "diskBytesSpilled" : 0
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json
new file mode 100644
index 0000000000000..cab4750270dfa
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json
@@ -0,0 +1,43 @@
+[ {
+  "jobId" : 2,
+  "name" : "count at <console>:17",
+  "stageIds" : [ 3 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}, {
+  "jobId" : 1,
+  "name" : "count at <console>:20",
+  "stageIds" : [ 1, 2 ],
+  "status" : "FAILED",
+  "numTasks" : 16,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 15,
+  "numSkippedTasks" : 15,
+  "numFailedTasks" : 1,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 1
+}, {
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json
new file mode 100644
index 0000000000000..6fd25befbf7e8
--- /dev/null
+++ b/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json
@@ -0,0 +1,29 @@
+[ {
+  "jobId" : 2,
+  "name" : "count at <console>:17",
+  "stageIds" : [ 3 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+}, {
+  "jobId" : 0,
+  "name" : "count at <console>:15",
+  "stageIds" : [ 0 ],
+  "status" : "SUCCEEDED",
+  "numTasks" : 8,
+  "numActiveTasks" : 0,
+  "numCompletedTasks" : 8,
+  "numSkippedTasks" : 8,
+  "numFailedTasks" : 0,
+  "numActiveStages" : 0,
+  "numCompletedStages" : 1,
+  "numSkippedStages" : 0,
+  "numFailedStages" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1
new file mode 100755
index 0000000000000..4794e56d1107a
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1
@@ -0,0 +1,88 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1422981759407}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1422981759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981759269","Timestamp":1422981758277,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981762069,"Completion Time":1422981762637,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981763578,"Completion Time":1422981764005,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764014,"Completion Time":1422981764045,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764396,"Completion Time":1422981764648,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981765026,"Completion Time":1422981765050,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1422981766912}
diff --git a/core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1
new file mode 100755
index 0000000000000..f14a000bf2c28
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1
@@ -0,0 +1,82 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57971},"Maximum Memory":278302556,"Timestamp":1422981780906}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57969","spark.repl.class.uri":"http://192.168.1.103:57968","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57970","spark.tachyonStore.folderName":"spark-3f19daee-844c-41d0-a3fc-5e3e508f9731","spark.app.id":"local-1422981780767"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981780767","Timestamp":1422981779720,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784812,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784814,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":541,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981784228,"Completion Time":1422981784819,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786282,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":88000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":94000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":79000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":73000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":83000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":436,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":98000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786285,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":101000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":76000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981785829,"Completion Time":1422981786286,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786337,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":1,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786342,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExceptionFailure","Class Name":"java.lang.RuntimeException","Description":"got a 3, failing","Stack Trace":[{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":18},{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"<console>","Line Number":17},{"Declaring Class":"scala.collection.Iterator$$anon$11","Method Name":"next","File Name":"Iterator.scala","Line Number":328},{"Declaring Class":"org.apache.spark.util.Utils$","Method Name":"getIteratorSize","File Name":"Utils.scala","Line Number":1311},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.scheduler.ResultTask","Method Name":"runTask","File Name":"ResultTask.scala","Line Number":61},{"Declaring Class":"org.apache.spark.scheduler.Task","Method Name":"run","File Name":"Task.scala","Line Number":56},{"Declaring Class":"org.apache.spark.executor.Executor$TaskRunner","Method Name":"run","File Name":"Executor.scala","Line Number":196},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor","Method Name":"runWorker","File Name":"ThreadPoolExecutor.java","Line Number":1145},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor$Worker","Method Name":"run","File Name":"ThreadPoolExecutor.java","Line Number":615},{"Declaring Class":"java.lang.Thread","Method Name":"run","File Name":"Thread.java","Line Number":745}],"Full Stack Trace":"java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n","Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786343,"Failed":true,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at <console>:20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:25)\n$line11.$read$$iwC$$iwC.<init>(<console>:27)\n$line11.$read$$iwC.<init>(<console>:29)\n$line11.$read.<init>(<console>:31)\n$line11.$read$.<init>(<console>:35)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981786296,"Completion Time":1422981786347,"Failure Reason":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobFailed","Exception":{"Message":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Stack Trace":[{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages","File Name":"DAGScheduler.scala","Line Number":1214},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1203},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"scala.collection.mutable.ResizableArray$class","Method Name":"foreach","File Name":"ResizableArray.scala","Line Number":59},{"Declaring Class":"scala.collection.mutable.ArrayBuffer","Method Name":"foreach","File Name":"ArrayBuffer.scala","Line Number":47},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"abortStage","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"scala.Option","Method Name":"foreach","File Name":"Option.scala","Line Number":236},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"handleTaskSetFailed","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2","Method Name":"applyOrElse","File Name":"DAGScheduler.scala","Line Number":1420},{"Declaring Class":"akka.actor.Actor$class","Method Name":"aroundReceive","File Name":"Actor.scala","Line Number":465},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor","Method Name":"aroundReceive","File Name":"DAGScheduler.scala","Line Number":1375},{"Declaring Class":"akka.actor.ActorCell","Method Name":"receiveMessage","File Name":"ActorCell.scala","Line Number":516},{"Declaring Class":"akka.actor.ActorCell","Method Name":"invoke","File Name":"ActorCell.scala","Line Number":487},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"processMailbox","File Name":"Mailbox.scala","Line Number":238},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"run","File Name":"Mailbox.scala","Line Number":220},{"Declaring Class":"akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask","Method Name":"exec","File Name":"AbstractDispatcher.scala","Line Number":393},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinTask","Method Name":"doExec","File Name":"ForkJoinTask.java","Line Number":260},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool$WorkQueue","Method Name":"runTask","File Name":"ForkJoinPool.java","Line Number":1339},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool","Method Name":"runWorker","File Name":"ForkJoinPool.java","Line Number":1979},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinWorkerThread","Method Name":"run","File Name":"ForkJoinWorkerThread.java","Line Number":107}]}}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787222,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":21,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line19.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line19.$read$$iwC$$iwC.<init>(<console>:24)\n$line19.$read$$iwC.<init>(<console>:26)\n$line19.$read.<init>(<console>:28)\n$line19.$read$.<init>(<console>:32)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.<init>(<console>:7)\n$line19.$eval$.<clinit>(<console>)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981787191,"Completion Time":1422981787226,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1422981788731}
diff --git a/core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1
new file mode 100755
index 0000000000000..9745b36b09e44
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1
@@ -0,0 +1,88 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1425081759407}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1425081759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1425081759269","Timestamp":1425081758277,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081762069,"Completion Time":1425081762637,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081763578,"Completion Time":1425081764005,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at <console>:17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line11.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line11.$read$$iwC$$iwC.<init>(<console>:24)\n$line11.$read$$iwC.<init>(<console>:26)\n$line11.$read.<init>(<console>:28)\n$line11.$read$.<init>(<console>:32)\n$line11.$read$.<clinit>(<console>)\n$line11.$eval$.<init>(<console>:7)\n$line11.$eval$.<clinit>(<console>)\n$line11.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764014,"Completion Time":1425081764045,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at <console>:14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$line10.$read$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line10.$read$$iwC$$iwC.<init>(<console>:21)\n$line10.$read$$iwC.<init>(<console>:23)\n$line10.$read.<init>(<console>:25)\n$line10.$read$.<init>(<console>:29)\n$line10.$read$.<clinit>(<console>)\n$line10.$eval$.<init>(<console>:7)\n$line10.$eval$.<clinit>(<console>)\n$line10.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at <console>:19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:19)\n$line13.$read$$iwC$$iwC$$iwC.<init>(<console>:24)\n$line13.$read$$iwC$$iwC.<init>(<console>:26)\n$line13.$read$$iwC.<init>(<console>:28)\n$line13.$read.<init>(<console>:30)\n$line13.$read$.<init>(<console>:34)\n$line13.$read$.<clinit>(<console>)\n$line13.$eval$.<init>(<console>:7)\n$line13.$eval$.<clinit>(<console>)\n$line13.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764396,"Completion Time":1425081764648,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at <console>:17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$line15.$read$$iwC$$iwC$$iwC.<init>(<console>:22)\n$line15.$read$$iwC$$iwC.<init>(<console>:24)\n$line15.$read$$iwC.<init>(<console>:26)\n$line15.$read.<init>(<console>:28)\n$line15.$read$.<init>(<console>:32)\n$line15.$read$.<clinit>(<console>)\n$line15.$eval$.<init>(<console>:7)\n$line15.$eval$.<clinit>(<console>)\n$line15.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081765026,"Completion Time":1425081765050,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1425081766912}
diff --git a/core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1
new file mode 100755
index 0000000000000..9ef5bd5d92de5
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1
@@ -0,0 +1,24 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426533911361}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426533911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426533910242,"User":"irashid","App Attempt ID":"1"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426533936103,"Completion Time":1426533936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1426533945177}
diff --git a/core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1
new file mode 100755
index 0000000000000..e7043282107d3
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1
@@ -0,0 +1,24 @@
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"<driver>","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426633911361}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426633911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426633910242,"User":"irashid","App Attempt ID":"2"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"<driver>","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20)\n$line9.$read$$iwC$$iwC.<init>(<console>:22)\n$line9.$read$$iwC.<init>(<console>:24)\n$line9.$read.<init>(<console>:26)\n$line9.$read$.<init>(<console>:30)\n$line9.$read$.<clinit>(<console>)\n$line9.$eval$.<init>(<console>:7)\n$line9.$eval$.<clinit>(<console>)\n$line9.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426633936103,"Completion Time":1426633936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1426633945177}
diff --git a/core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0
new file mode 100755
index 0000000000000..e69de29bb2d1d
diff --git a/core/src/test/resources/spark-events/local-1430917381534 b/core/src/test/resources/spark-events/local-1430917381534
new file mode 100755
index 0000000000000..24ac478073e3d
--- /dev/null
+++ b/core/src/test/resources/spark-events/local-1430917381534
@@ -0,0 +1,231 @@
+{"Event":"SparkListenerLogStart","Spark Version":"1.4.0-SNAPSHOT"}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":61103},"Maximum Memory":278019440,"Timestamp":1430917381651}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.102","spark.eventLog.enabled":"true","spark.driver.port":"61101","spark.repl.class.uri":"http://192.168.1.102:61100","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.eventLog.dir":"/Users/irashid/github/kraps/core/src/test/resources/spark-events","spark.fileserver.uri":"http://192.168.1.102:61102","spark.tachyonStore.folderName":"spark-aaaf41b3-d1dd-447f-8951-acf51490758b","spark.app.id":"local-1430917381534"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/github/spark","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --conf spark.eventLog.enabled=true --conf spark.eventLog.dir=/Users/irashid/github/kraps/core/src/test/resources/spark-events --class org.apache.spark.repl.Main spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/etc/hadoop":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/github/spark/conf/":"System Classpath","/Users/irashid/github/spark/assembly/target/scala-2.10/spark-assembly-1.4.0-SNAPSHOT-hadoop2.5.0.jar":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-api-jdo-3.2.6.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1430917381534","Timestamp":1430917380893,"User":"irashid"}
+{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1430917386422,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at <console>:26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at <console>:26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]}],"Stage IDs":[0,1],"Properties":{}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at <console>:26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},"Properties":{}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1430917386494,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1430917386502,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1430917386503,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1430917386506,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1430917386914,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1430917386915,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1430917386916,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1430917386918,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386919,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":30,"Executor Run Time":350,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3675510,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1430917386503,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386922,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":32,"Executor Run Time":348,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89885,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1430917386502,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386923,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":350,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3934399,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386923,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":83022,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1430917386923,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386924,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":29,"Executor Run Time":351,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":4016617,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1430917386924,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387062,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1430917386925,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387019,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386927,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":1311694,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1430917386928,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387011,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1430917386494,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386929,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":32,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3842811,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":49294,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1430917386506,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386929,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":2579051,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1430917387001,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387099,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1430917386914,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":80,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":121551,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1430917387005,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387128,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1430917386918,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387007,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":75,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94507,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1430917387010,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1430917386928,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387011,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":76,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":602780,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1430917387012,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387106,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1430917387014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387104,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1430917387015,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387111,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1430917386916,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387015,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":73,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94709,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1430917386915,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":101664,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60489,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1430917386923,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":77,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102476,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1430917387018,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387119,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1430917386925,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387019,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":83,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95646,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1430917387031,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1430917386924,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387062,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":76,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95004,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1430917387098,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1430917387001,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387099,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108320,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1430917387103,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1430917387014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387104,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":83,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97716,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1430917387105,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1430917387012,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387106,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95788,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1430917387110,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1430917387015,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387111,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":88,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100270,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1430917387113,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1430917387114,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1430917387031,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":65,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":91844,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1430917387010,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":92,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100836,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1430917387118,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1430917387018,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387119,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":93,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":143427,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1430917387127,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1430917387005,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387128,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":91,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":99944,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1430917387148,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1430917387149,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1430917387098,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387150,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":43,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":157194,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1430917387110,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387151,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102019,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1430917387156,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1430917387105,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387157,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108213,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1430917387161,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1430917387113,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387162,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":29,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104299,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1430917387164,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1430917387103,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387164,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":49,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94134,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1430917387165,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1430917387166,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1430917387114,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387166,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":39,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":114938,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1430917387180,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1430917387118,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387180,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":34,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":119770,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1430917387148,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387181,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89603,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1430917387197,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1430917387156,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387198,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":127746,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1430917387200,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1430917387127,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387201,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":36,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92619,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1430917387203,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1430917387164,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387204,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":29,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":123855,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1430917387204,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1430917387205,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1430917387206,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1430917387165,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387207,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":111869,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1430917387149,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387207,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":43,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":118329,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1430917387210,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1430917387161,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387211,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":35,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":160963,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1430917387212,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1430917387180,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387212,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98748,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1430917387166,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387213,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":131158,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1430917387220,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1430917387197,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387221,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":14,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94792,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1430917387223,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1430917387200,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90765,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1430917387240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1430917387205,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387242,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98293,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1430917387242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1430917387243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1430917387204,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387243,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":171516,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1430917387206,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387243,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92985,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1430917387203,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387245,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103713,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1430917387212,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387245,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103015,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1430917387246,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1430917387249,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1430917387220,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387250,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":139844,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1430917387210,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387253,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":113322,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1430917387257,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1430917387223,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387257,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":7,"Executor Run Time":23,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94984,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1430917387263,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1430917387242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387263,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":96013,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1430917387265,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1430917387240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387266,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90836,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1430917387269,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1430917387270,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1430917387243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387271,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89664,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1430917387272,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387273,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92835,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1430917387276,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1430917387246,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387276,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":21,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108309,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1430917387277,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1430917387257,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387278,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":96849,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1430917387279,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1430917387249,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387280,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90329,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1430917387285,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1430917387263,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97521,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1430917387288,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1430917387265,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387288,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100753,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387303,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90506,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1430917387306,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1430917387269,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387307,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":156916,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1430917387308,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1430917387272,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387309,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100396,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1430917387311,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1430917387277,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387311,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":25,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104087,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1430917387313,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1430917387314,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1430917387270,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387315,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":36,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100426,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1430917387316,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1430917387276,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387316,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":20,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102779,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1430917387318,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1430917387285,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387319,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100823,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1430917387279,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387321,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":99123,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1430917387323,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1430917387288,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387323,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92741,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1430917387327,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1430917387306,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387328,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":101750,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1430917387329,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1430917387308,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387331,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89437,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1430917387338,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1430917387313,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":183323,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1430917387339,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1430917387311,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":105933,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1430917387341,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1430917387318,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98047,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1430917387342,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1430917387314,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387343,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":139584,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1430917387349,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1430917387323,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387359,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":20,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":134733,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1430917387360,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1430917387316,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387360,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":16,"Executor Run Time":25,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":197728,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1430917387368,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1430917387329,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387368,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":30,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104879,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1430917387371,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1430917387327,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387371,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":117198,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1430917387339,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387375,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":760569,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1430917387341,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387375,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98069,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1430917387376,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1430917387338,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387377,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94029,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1430917387384,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1430917387349,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387385,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":23,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103837,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1430917387385,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1430917387342,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387386,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102935,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1430917387394,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1430917387368,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387395,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97176,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1430917387400,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1430917387371,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387401,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103743,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1430917387402,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387402,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95848,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1430917387404,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1430917387360,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387405,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":37,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":134049,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1430917387407,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1430917387376,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387407,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94269,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1430917387385,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387408,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98472,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1430917387410,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387410,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102159,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1430917387384,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387417,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97226,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1430917387426,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1430917387394,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387427,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":21,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98301,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1430917387404,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387441,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102455,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1430917387400,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387442,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":114766,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387443,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":26,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100678,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1430917387402,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387443,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95818,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387444,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":117803,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1430917387410,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387444,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":7,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":105039,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1430917387426,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387448,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":133964,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70565,"Records Read":10000}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1430917387407,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387450,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":131987,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at <console>:26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Submission Time":1430917386485,"Completion Time":1430917387451,"Accumulables":[]}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at <console>:26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},"Properties":{}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":100,"Index":0,"Attempt":0,"Launch Time":1430917387462,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":101,"Index":1,"Attempt":0,"Launch Time":1430917387464,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":102,"Index":2,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":103,"Index":3,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":104,"Index":4,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":105,"Index":5,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":106,"Index":6,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":107,"Index":7,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":108,"Index":8,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":103,"Index":3,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387546,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":75,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":109,"Index":9,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":105,"Index":5,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387547,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":100,"Index":0,"Attempt":0,"Launch Time":1430917387462,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":74,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":107,"Index":7,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":101,"Index":1,"Attempt":0,"Launch Time":1430917387464,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":106,"Index":6,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387549,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":75,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":104,"Index":4,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387549,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":74,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":102,"Index":2,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387550,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":79,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":108,"Index":8,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387583,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":31,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":109,"Index":9,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387585,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":30,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at <console>:26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)\n$line20.$read$$iwC$$iwC$$iwC.<init>(<console>:39)\n$line20.$read$$iwC$$iwC.<init>(<console>:41)\n$line20.$read$$iwC.<init>(<console>:43)\n$line20.$read.<init>(<console>:45)\n$line20.$read$.<init>(<console>:49)\n$line20.$read$.<clinit>(<console>)\n$line20.$eval$.<init>(<console>:7)\n$line20.$eval$.<clinit>(<console>)\n$line20.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Submission Time":1430917387462,"Completion Time":1430917387585,"Accumulables":[]}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1430917387589,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1430917391398}
diff --git a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
new file mode 100644
index 0000000000000..ba367cd476146
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark
+
+import org.json4s._
+import org.json4s.jackson.JsonMethods
+
+trait JsonTestUtils {
+  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) {
+    val Diff(c, a, d) = validateJson.diff(expectedJson)
+    val validatePretty = JsonMethods.pretty(validateJson)
+    val expectedPretty = JsonMethods.pretty(expectedJson)
+    val errorMessage = s"Expected:\n$expectedPretty\nFound:\n$validatePretty"
+    import org.scalactic.TripleEquals._
+    assert(c === JNothing, s"$errorMessage\nChanged:\n${JsonMethods.pretty(c)}")
+    assert(a === JNothing, s"$errorMessage\nAdded:\n${JsonMethods.pretty(a)}")
+    assert(d === JNothing, s"$errorMessage\nDeleted:\n${JsonMethods.pretty(d)}")
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index b58d62567afe1..e04a79284175c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -28,9 +28,9 @@ import org.scalatest.FunSuite
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, RecoveryState, WorkerInfo}
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf}
 
-class JsonProtocolSuite extends FunSuite {
+class JsonProtocolSuite extends FunSuite with JsonTestUtils {
 
   test("writeApplicationInfo") {
     val output = JsonProtocol.writeApplicationInfo(createAppInfo())
@@ -136,16 +136,6 @@ class JsonProtocolSuite extends FunSuite {
       case e: JsonParseException => fail("Invalid Json detected", e)
     }
   }
-
-  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) {
-    val Diff(c, a, d) = validateJson diff expectedJson
-    val validatePretty = JsonMethods.pretty(validateJson)
-    val expectedPretty = JsonMethods.pretty(expectedJson)
-    val errorMessage = s"Expected:\n$expectedPretty\nFound:\n$validatePretty"
-    assert(c === JNothing, s"$errorMessage\nChanged:\n${JsonMethods.pretty(c)}")
-    assert(a === JNothing, s"$errorMessage\nAdded:\n${JsonMethods.pretty(a)}")
-    assert(d === JNothing, s"$errorMessage\nDelected:\n${JsonMethods.pretty(d)}")
-  }
 }
 
 object JsonConstants {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 71ba9c18257b8..318ab5dbc4804 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -14,22 +14,164 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.deploy.history
 
-import javax.servlet.http.HttpServletRequest
-
-import scala.collection.mutable
+import java.io.{File, FileInputStream, FileWriter, IOException}
+import java.net.{HttpURLConnection, URL}
+import javax.servlet.http.{HttpServletRequest, HttpServletResponse}
 
-import org.apache.hadoop.fs.Path
-import org.mockito.Mockito.{when}
-import org.scalatest.FunSuite
-import org.scalatest.Matchers
+import org.apache.commons.io.{FileUtils, IOUtils}
+import org.mockito.Mockito.when
+import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 import org.scalatest.mock.MockitoSugar
 
+import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf}
 import org.apache.spark.ui.SparkUI
 
-class HistoryServerSuite extends FunSuite with Matchers with MockitoSugar {
+/**
+ * A collection of tests against the historyserver, including comparing responses from the json
+ * metrics api to a set of known "golden files".  If new endpoints / parameters are added,
+ * cases should be added to this test suite.  The expected outcomes can be genered by running
+ * the HistoryServerSuite.main.  Note that this will blindly generate new expectation files matching
+ * the current behavior -- the developer must verify that behavior is correct.
+ *
+ * Similarly, if the behavior is changed, HistoryServerSuite.main can be run to update the
+ * expectations.  However, in general this should be done with extreme caution, as the metrics
+ * are considered part of Spark's public api.
+ */
+class HistoryServerSuite extends FunSuite with BeforeAndAfter with Matchers with MockitoSugar
+  with JsonTestUtils {
+
+  private val logDir = new File("src/test/resources/spark-events")
+  private val expRoot = new File("src/test/resources/HistoryServerExpectations/")
+
+  private var provider: FsHistoryProvider = null
+  private var server: HistoryServer = null
+  private var port: Int = -1
+
+  def init(): Unit = {
+    val conf = new SparkConf()
+      .set("spark.history.fs.logDirectory", logDir.getAbsolutePath)
+      .set("spark.history.fs.updateInterval", "0")
+      .set("spark.testing", "true")
+    provider = new FsHistoryProvider(conf)
+    provider.checkForLogs()
+    val securityManager = new SecurityManager(conf)
+
+    server = new HistoryServer(conf, provider, securityManager, 18080)
+    server.initialize()
+    server.bind()
+    port = server.boundPort
+  }
+
+  def stop(): Unit = {
+    server.stop()
+  }
+
+  before {
+    init()
+  }
+
+  after{
+    stop()
+  }
+
+  val cases = Seq(
+    "application list json" -> "applications",
+    "completed app list json" -> "applications?status=completed",
+    "running app list json" -> "applications?status=running",
+    "minDate app list json" -> "applications?minDate=2015-02-10",
+    "maxDate app list json" -> "applications?maxDate=2015-02-10",
+    "maxDate2 app list json" -> "applications?maxDate=2015-02-03T10:42:40.000CST",
+    "one app json" -> "applications/local-1422981780767",
+    "one app multi-attempt json" -> "applications/local-1426533911241",
+    "job list json" -> "applications/local-1422981780767/jobs",
+    "job list from multi-attempt app json(1)" -> "applications/local-1426533911241/1/jobs",
+    "job list from multi-attempt app json(2)" -> "applications/local-1426533911241/2/jobs",
+    "one job json" -> "applications/local-1422981780767/jobs/0",
+    "succeeded job list json" -> "applications/local-1422981780767/jobs?status=succeeded",
+    "succeeded&failed job list json" ->
+      "applications/local-1422981780767/jobs?status=succeeded&status=failed",
+    "executor list json" -> "applications/local-1422981780767/executors",
+    "stage list json" -> "applications/local-1422981780767/stages",
+    "complete stage list json" -> "applications/local-1422981780767/stages?status=complete",
+    "failed stage list json" -> "applications/local-1422981780767/stages?status=failed",
+    "one stage json" -> "applications/local-1422981780767/stages/1",
+    "one stage attempt json" -> "applications/local-1422981780767/stages/1/0",
+
+    "stage task summary w shuffle write"
+      -> "applications/local-1430917381534/stages/0/0/taskSummary",
+    "stage task summary w shuffle read"
+      -> "applications/local-1430917381534/stages/1/0/taskSummary",
+    "stage task summary w/ custom quantiles" ->
+      "applications/local-1430917381534/stages/0/0/taskSummary?quantiles=0.01,0.5,0.99",
+
+    "stage task list" -> "applications/local-1430917381534/stages/0/0/taskList",
+    "stage task list w/ offset & length" ->
+      "applications/local-1430917381534/stages/0/0/taskList?offset=10&length=50",
+    "stage task list w/ sortBy" ->
+      "applications/local-1430917381534/stages/0/0/taskList?sortBy=DECREASING_RUNTIME",
+    "stage task list w/ sortBy short names: -runtime" ->
+      "applications/local-1430917381534/stages/0/0/taskList?sortBy=-runtime",
+    "stage task list w/ sortBy short names: runtime" ->
+      "applications/local-1430917381534/stages/0/0/taskList?sortBy=runtime",
+
+    "stage list with accumulable json" -> "applications/local-1426533911241/1/stages",
+    "stage with accumulable json" -> "applications/local-1426533911241/1/stages/0/0",
+    "stage task list from multi-attempt app json(1)" ->
+      "applications/local-1426533911241/1/stages/0/0/taskList",
+    "stage task list from multi-attempt app json(2)" ->
+      "applications/local-1426533911241/2/stages/0/0/taskList",
+
+    "rdd list storage json" -> "applications/local-1422981780767/storage/rdd",
+    "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0"
+  )
+
+  // run a bunch of characterization tests -- just verify the behavior is the same as what is saved
+  // in the test resource folder
+  cases.foreach { case (name, path) =>
+    test(name) {
+      val (code, jsonOpt, errOpt) = getContentAndCode(path)
+      code should be (HttpServletResponse.SC_OK)
+      jsonOpt should be ('defined)
+      errOpt should be (None)
+      val json = jsonOpt.get
+      val exp = IOUtils.toString(new FileInputStream(
+        new File(expRoot, HistoryServerSuite.sanitizePath(name) + "_expectation.json")))
+      // compare the ASTs so formatting differences don't cause failures
+      import org.json4s._
+      import org.json4s.jackson.JsonMethods._
+      val jsonAst = parse(json)
+      val expAst = parse(exp)
+      assertValidDataInJson(jsonAst, expAst)
+    }
+  }
+
+  test("response codes on bad paths") {
+    val badAppId = getContentAndCode("applications/foobar")
+    badAppId._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    badAppId._3 should be (Some("unknown app: foobar"))
+
+    val badStageId = getContentAndCode("applications/local-1422981780767/stages/12345")
+    badStageId._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    badStageId._3 should be (Some("unknown stage: 12345"))
+
+    val badStageAttemptId = getContentAndCode("applications/local-1422981780767/stages/1/1")
+    badStageAttemptId._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    badStageAttemptId._3 should be (Some("unknown attempt for stage 1.  Found attempts: [0]"))
+
+    val badStageId2 = getContentAndCode("applications/local-1422981780767/stages/flimflam")
+    badStageId2._1 should be (HttpServletResponse.SC_NOT_FOUND)
+    // will take some mucking w/ jersey to get a better error msg in this case
+
+    val badQuantiles = getContentAndCode(
+      "applications/local-1430917381534/stages/0/0/taskSummary?quantiles=foo,0.1")
+    badQuantiles._1 should be (HttpServletResponse.SC_BAD_REQUEST)
+    badQuantiles._3 should be (Some("Bad value for parameter \"quantiles\".  Expected a double, " +
+      "got \"foo\""))
+
+    getContentAndCode("foobar")._1 should be (HttpServletResponse.SC_NOT_FOUND)
+  }
 
   test("generate history page with relative links") {
     val historyServer = mock[HistoryServer]
@@ -54,4 +196,75 @@ class HistoryServerSuite extends FunSuite with Matchers with MockitoSugar {
     } yield (attrs.toString)
     justHrefs should contain(link)
   }
+
+  def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
+    HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/json/v1/$path"))
+  }
+
+  def getUrl(path: String): String = {
+    HistoryServerSuite.getUrl(new URL(s"http://localhost:$port/json/v1/$path"))
+  }
+
+  def generateExpectation(name: String, path: String): Unit = {
+    val json = getUrl(path)
+    val file = new File(expRoot, HistoryServerSuite.sanitizePath(name) + "_expectation.json")
+    val out = new FileWriter(file)
+    out.write(json)
+    out.close()
+  }
+}
+
+object HistoryServerSuite {
+  def main(args: Array[String]): Unit = {
+    // generate the "expected" results for the characterization tests.  Just blindly assume the
+    // current behavior is correct, and write out the returned json to the test/resource files
+
+    val suite = new HistoryServerSuite
+    FileUtils.deleteDirectory(suite.expRoot)
+    suite.expRoot.mkdirs()
+    try {
+      suite.init()
+      suite.cases.foreach { case (name, path) =>
+        suite.generateExpectation(name, path)
+      }
+    } finally {
+      suite.stop()
+    }
+  }
+
+  def getContentAndCode(url: URL): (Int, Option[String], Option[String]) = {
+    val connection = url.openConnection().asInstanceOf[HttpURLConnection]
+    connection.setRequestMethod("GET")
+    connection.connect()
+    val code = connection.getResponseCode()
+    val inString = try {
+      val in = Option(connection.getInputStream())
+      in.map(IOUtils.toString)
+    } catch {
+      case io: IOException => None
+    }
+    val errString = try {
+      val err = Option(connection.getErrorStream())
+      err.map(IOUtils.toString)
+    } catch {
+      case io: IOException => None
+    }
+    (code, inString, errString)
+  }
+
+
+  def sanitizePath(path: String): String = {
+    // this doesn't need to be perfect, just good enough to avoid collisions
+    path.replaceAll("\\W", "_")
+  }
+
+  def getUrl(path: URL): String = {
+    val (code, resultOpt, error) = getContentAndCode(path)
+    if (code == 200) {
+      resultOpt.get
+    } else {
+      throw new RuntimeException(
+        "got code: " + code + " when getting " + path + " w/ error: " + error)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala b/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala
new file mode 100644
index 0000000000000..5274df904d395
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamTest.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import org.scalatest.{Matchers, FunSuite}
+
+class SimpleDateParamTest extends FunSuite with Matchers {
+
+  test("date parsing") {
+    new SimpleDateParam("2015-02-20T23:21:17.190GMT").timestamp should be (1424474477190L)
+    new SimpleDateParam("2015-02-20T17:21:17.190CST").timestamp should be (1424474477190L)
+    new SimpleDateParam("2015-02-20").timestamp should be (1424390400000L)  // GMT
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index d53d7f3ba5ae7..117b2c3960820 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -18,11 +18,13 @@
 package org.apache.spark.ui
 
 import java.net.{HttpURLConnection, URL}
-import javax.servlet.http.HttpServletRequest
+import javax.servlet.http.{HttpServletResponse, HttpServletRequest}
 
 import scala.collection.JavaConversions._
 import scala.xml.Node
 
+import org.json4s._
+import org.json4s.jackson.JsonMethods
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.openqa.selenium.{By, WebDriver}
 import org.scalatest._
@@ -33,8 +35,9 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.LocalSparkContext._
 import org.apache.spark._
 import org.apache.spark.api.java.StorageLevels
+import org.apache.spark.deploy.history.HistoryServerSuite
 import org.apache.spark.shuffle.FetchFailedException
-
+import org.apache.spark.status.api.v1.{JacksonMessageWriter, StageStatus}
 
 /**
  * Selenium tests for the Spark Web UI.
@@ -42,6 +45,8 @@ import org.apache.spark.shuffle.FetchFailedException
 class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with BeforeAndAfterAll {
 
   implicit var webDriver: WebDriver = _
+  implicit val formats = DefaultFormats
+
 
   override def beforeAll(): Unit = {
     webDriver = new HtmlUnitDriver
@@ -76,28 +81,42 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       val rdd = sc.parallelize(Seq(1, 2, 3))
       rdd.persist(StorageLevels.DISK_ONLY).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        goToUi(ui, "/storage")
         val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.DISK_ONLY.description)
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        goToUi(ui, "/storage/rdd/?id=0")
         val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.DISK_ONLY.description)
       }
 
+      val storageJson = getJson(ui, "storage/rdd")
+      storageJson.children.length should be (1)
+      (storageJson \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
+      val rddJson = getJson(ui, "storage/rdd/0")
+      (rddJson  \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
+
       rdd.unpersist()
       rdd.persist(StorageLevels.MEMORY_ONLY).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage")
+        goToUi(ui, "/storage")
         val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0")
+        goToUi(ui, "/storage/rdd/?id=0")
         val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq
         tableRowText should contain (StorageLevels.MEMORY_ONLY.description)
       }
+
+      val updatedStorageJson = getJson(ui, "storage/rdd")
+      updatedStorageJson.children.length should be (1)
+      (updatedStorageJson \ "storageLevel").extract[String] should be (
+        StorageLevels.MEMORY_ONLY.description)
+      val updatedRddJson = getJson(ui, "storage/rdd/0")
+      (updatedRddJson  \ "storageLevel").extract[String] should be (
+        StorageLevels.MEMORY_ONLY.description)
     }
   }
 
@@ -108,10 +127,13 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
         sc.parallelize(1 to 10).map { x => throw new Exception()}.collect()
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         find(id("active")) should be(None)  // Since we hide empty tables
         find(id("failed")).get.text should be("Failed Stages (1)")
       }
+      val stageJson = getJson(sc.ui.get, "stages")
+      stageJson.children.length should be (1)
+      (stageJson \ "status").extract[String] should be (StageStatus.FAILED.name())
 
       // Regression test for SPARK-2105
       class NotSerializable
@@ -120,12 +142,15 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
         sc.parallelize(1 to 10).map { x => unserializableObject}.collect()
       }
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         find(id("active")) should be(None)  // Since we hide empty tables
         // The failure occurs before the stage becomes active, hence we should still show only one
         // failed stage, not two:
         find(id("failed")).get.text should be("Failed Stages (1)")
       }
+
+      val updatedStageJson = getJson(sc.ui.get, "stages")
+      updatedStageJson should be (stageJson)
     }
   }
 
@@ -138,7 +163,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       runSlowJob(sc)
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         assert(hasKillLink)
       }
     }
@@ -146,7 +171,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
     withSpark(newSparkContext(killEnabled = false)) { sc =>
       runSlowJob(sc)
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages")
+        goToUi(sc, "/stages")
         assert(!hasKillLink)
       }
     }
@@ -157,7 +182,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       // If no job has been run in a job group, then "(Job Group)" should not appear in the header
       sc.parallelize(Seq(1, 2, 3)).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
         tableHeaders should not contain "Job Id (Job Group)"
       }
@@ -165,10 +190,22 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       sc.setJobGroup("my-job-group", "my-job-group-description")
       sc.parallelize(Seq(1, 2, 3)).count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq
         tableHeaders should contain ("Job Id (Job Group)")
       }
+
+      val jobJson = getJson(sc.ui.get, "jobs")
+      for {
+        job @ JObject(_) <- jobJson
+        JInt(jobId) <- job \ "jobId"
+        jobGroup = job \ "jobGroup"
+      } {
+        jobId.toInt match {
+          case 0 => jobGroup should be (JNothing)
+          case 1 => jobGroup should be (JString("my-job-group"))
+        }
+      }
     }
   }
 
@@ -195,7 +232,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       mappedData.count()
       eventually(timeout(5 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         find(cssSelector(".stage-progress-cell")).get.text should be ("2/2 (1 failed)")
         // Ideally, the following test would pass, but currently we overcount completed tasks
         // if task recomputations occur:
@@ -204,6 +241,32 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
         // of completed tasks may be higher:
         find(cssSelector(".progress-cell .progress")).get.text should be ("3/2 (1 failed)")
       }
+      val jobJson = getJson(sc.ui.get, "jobs")
+      (jobJson \ "numTasks").extract[Int]should be (2)
+      (jobJson \ "numCompletedTasks").extract[Int] should be (3)
+      (jobJson \ "numFailedTasks").extract[Int] should be (1)
+      (jobJson \ "numCompletedStages").extract[Int] should be (2)
+      (jobJson \ "numFailedStages").extract[Int] should be (1)
+      val stageJson = getJson(sc.ui.get, "stages")
+
+      for {
+        stage @ JObject(_) <- stageJson
+        JString(status) <- stage \ "status"
+        JInt(stageId) <- stage \ "stageId"
+        JInt(attemptId) <- stage \ "attemptId"
+      } {
+        val exp = if (attemptId == 0 && stageId == 1) StageStatus.FAILED else StageStatus.COMPLETE
+        status should be (exp.name())
+      }
+
+      for {
+        stageId <- 0 to 1
+        attemptId <- 0 to 1
+      } {
+        val exp = if (attemptId == 0 && stageId == 1) StageStatus.FAILED else StageStatus.COMPLETE
+        val stageJson = getJson(sc.ui.get, s"stages/$stageId/$attemptId")
+        (stageJson \ "status").extract[String] should be (exp.name())
+      }
     }
   }
 
@@ -218,7 +281,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       // Start the job:
       rdd.countAsync()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=0")
+        goToUi(sc, "/jobs/job/?id=0")
         find(id("active")).get.text should be ("Active Stages (1)")
         find(id("pending")).get.text should be ("Pending Stages (2)")
         // Essentially, we want to check that none of the stage rows show
@@ -244,7 +307,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       rdd.count()
       rdd.count()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         // The completed jobs table should have two rows. The first row will be the most recent job:
         val firstRow = find(cssSelector("tbody tr")).get.underlying
         val firstRowColumns = firstRow.findElements(By.tagName("td"))
@@ -271,7 +334,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       rdd.count()
       rdd.count()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=1")
+        goToUi(sc, "/jobs/job/?id=1")
         find(id("pending")) should be (None)
         find(id("active")) should be (None)
         find(id("failed")) should be (None)
@@ -299,7 +362,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       rdd.count()
       rdd.count()
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs")
+        goToUi(sc, "/jobs")
         findAll(cssSelector("tbody tr a")).foreach { link =>
           link.text.toLowerCase should include ("count")
           link.text.toLowerCase should not include "unknown"
@@ -321,7 +384,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       sparkUI.attachTab(newTab)
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/"))
+        goToUi(sc, "")
         find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="storage"]""")) should not be(None)
@@ -330,12 +393,12 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
         // check whether new page exists
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo")
+        goToUi(sc, "/foo")
         find(cssSelector("b")).get.text should include ("html magic")
       }
       sparkUI.detachTab(newTab)
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        go to (sc.ui.get.appUIAddress.stripSuffix("/"))
+        goToUi(sc, "")
         find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
         find(cssSelector("""ul li a[href*="storage"]""")) should not be(None)
@@ -344,7 +407,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
       eventually(timeout(10 seconds), interval(50 milliseconds)) {
         // check new page not exist
-        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo")
+        goToUi(sc, "/foo")
         find(cssSelector("b")) should be(None)
       }
     }
@@ -371,4 +434,163 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
     }
   }
+
+  test("stage & job retention") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+      .set("spark.ui.port", "0")
+      .set("spark.ui.retainedStages", "3")
+      .set("spark.ui.retainedJobs", "2")
+    val sc = new SparkContext(conf)
+    assert(sc.ui.isDefined)
+
+    withSpark(sc) { sc =>
+      // run a few jobs & stages ...
+      (0 until 5).foreach { idx =>
+        // NOTE: if we reverse the order, things don't really behave nicely
+        // we lose the stage for a job we keep, and then the job doesn't know
+        // about its last stage
+        sc.parallelize(idx to (idx + 3)).map(identity).groupBy(identity).map(identity)
+          .groupBy(identity).count()
+        sc.parallelize(idx to (idx + 3)).collect()
+      }
+
+      val expJobInfo = Seq(
+        ("9", "collect"),
+        ("8", "count")
+      )
+
+      eventually(timeout(1 second), interval(50 milliseconds)) {
+        goToUi(sc, "/jobs")
+        // The completed jobs table should have two rows. The first row will be the most recent job:
+        find("completed-summary").get.text should be ("Completed Jobs: 10, only showing 2")
+        find("completed").get.text should be ("Completed Jobs (10, only showing 2)")
+        val rows = findAll(cssSelector("tbody tr")).toIndexedSeq.map{_.underlying}
+        rows.size should be (expJobInfo.size)
+        for {
+          (row, idx) <- rows.zipWithIndex
+          columns = row.findElements(By.tagName("td"))
+          id = columns(0).getText()
+          description = columns(1).getText()
+        } {
+          id should be (expJobInfo(idx)._1)
+          description should include (expJobInfo(idx)._2)
+        }
+      }
+
+      val jobsJson = getJson(sc.ui.get, "jobs")
+      jobsJson.children.size should be (expJobInfo.size)
+      for {
+        (job @ JObject(_),idx) <- jobsJson.children.zipWithIndex
+        id = (job \ "jobId").extract[String]
+        name = (job \ "name").extract[String]
+      } {
+        withClue(s"idx = $idx; id = $id; name = ${name.substring(0,20)}") {
+          id should be (expJobInfo(idx)._1)
+          name should include (expJobInfo(idx)._2)
+        }
+      }
+
+      // what about when we query for a job that did exist, but has been cleared?
+      goToUi(sc, "/jobs/job/?id=7")
+      find("no-info").get.text should be ("No information to display for job 7")
+
+      val badJob = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get, "jobs/7"))
+      badJob._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badJob._2 should be (None)
+      badJob._3 should be (Some("unknown job: 7"))
+
+      val expStageInfo = Seq(
+        ("19", "collect"),
+        ("18", "count"),
+        ("17", "groupBy")
+      )
+
+      eventually(timeout(1 second), interval(50 milliseconds)) {
+        goToUi(sc, "/stages")
+        find("completed-summary").get.text should be ("Completed Stages: 20, only showing 3")
+        find("completed").get.text should be ("Completed Stages (20, only showing 3)")
+        val rows = findAll(cssSelector("tbody tr")).toIndexedSeq.map{_.underlying}
+        rows.size should be (3)
+        for {
+          (row, idx) <- rows.zipWithIndex
+          columns = row.findElements(By.tagName("td"))
+          id = columns(0).getText()
+          description = columns(1).getText()
+        } {
+          id should be (expStageInfo(idx)._1)
+          description should include (expStageInfo(idx)._2)
+        }
+      }
+
+      val stagesJson = getJson(sc.ui.get, "stages")
+      stagesJson.children.size should be (3)
+      for {
+        (stage @ JObject(_), idx) <- stagesJson.children.zipWithIndex
+        id = (stage \ "stageId").extract[String]
+        name = (stage \ "name").extract[String]
+      } {
+        id should be (expStageInfo(idx)._1)
+        name should include (expStageInfo(idx)._2)
+      }
+
+      // nonexistent stage
+
+      goToUi(sc, "/stages/stage/?id=12&attempt=0")
+      find("no-info").get.text should be ("No information to display for Stage 12 (Attempt 0)")
+      val badStage = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get,"stages/12/0"))
+      badStage._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badStage._2 should be (None)
+      badStage._3 should be (Some("unknown stage: 12"))
+
+      val badAttempt = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get,"stages/19/15"))
+      badAttempt._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badAttempt._2 should be (None)
+      badAttempt._3 should be (Some("unknown attempt for stage 19.  Found attempts: [0]"))
+
+      val badStageAttemptList = HistoryServerSuite.getContentAndCode(
+        jsonUrl(sc.ui.get, "stages/12"))
+      badStageAttemptList._1 should be (HttpServletResponse.SC_NOT_FOUND)
+      badStageAttemptList._2 should be (None)
+      badStageAttemptList._3 should be (Some("unknown stage: 12"))
+    }
+  }
+
+  test("live UI json application list") {
+    withSpark(newSparkContext()) { sc =>
+      val appListRawJson = HistoryServerSuite.getUrl(new URL(
+        sc.ui.get.appUIAddress + "/json/v1/applications"))
+      val appListJsonAst = JsonMethods.parse(appListRawJson)
+      appListJsonAst.children.length should be (1)
+      val attempts = (appListJsonAst \ "attempts").children
+      attempts.size should be (1)
+      (attempts(0) \ "completed").extract[Boolean] should be (false)
+      parseDate(attempts(0) \ "startTime") should be (sc.startTime)
+      parseDate(attempts(0) \ "endTime") should be (-1)
+      val oneAppJsonAst = getJson(sc.ui.get, "")
+      oneAppJsonAst should be (appListJsonAst.children(0))
+    }
+  }
+
+  def goToUi(sc: SparkContext, path: String): Unit = {
+    goToUi(sc.ui.get, path)
+  }
+
+  def goToUi(ui: SparkUI, path: String): Unit = {
+    go to (ui.appUIAddress.stripSuffix("/") + path)
+  }
+
+  def parseDate(json: JValue): Long = {
+    JacksonMessageWriter.makeISODateFormat.parse(json.extract[String]).getTime
+  }
+
+  def getJson(ui: SparkUI, path: String): JValue = {
+    JsonMethods.parse(HistoryServerSuite.getUrl(jsonUrl(ui, path)))
+  }
+
+  def jsonUrl(ui: SparkUI, path: String): URL = {
+    new URL(ui.appUIAddress + "/json/v1/applications/test/" + path)
+  }
 }
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 8a85928d6d44d..1e0fc150862fb 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -174,6 +174,80 @@ making it easy to identify slow tasks, data skew, etc.
 
 Note that the history server only displays completed Spark jobs. One way to signal the completion of a Spark job is to stop the Spark Context explicitly (`sc.stop()`), or in Python using the `with SparkContext() as sc:` to handle the Spark Context setup and tear down, and still show the job history on the UI.
 
+## REST API
+
+In addition to viewing the metrics in the UI, they are also available as JSON.  This gives developers
+an easy way to create new visualizations and monitoring tools for Spark.  The JSON is available for
+both running applications, and in the history server.  The endpoints are mounted at `/json/v1`.  Eg.,
+for the history server, they would typically be accessible at `http://<server-url>:18080/json/v1`, and
+for a running application, at `http://localhost:4040/json/v1`.
+
+<table class="table">
+  <tr><th>Endpoint</th><th>Meaning</th></tr>
+  <tr>
+    <td><code>/applications</code></td>
+    <td>A list of all applications</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/jobs</code></td>
+    <td>A list of all jobs for a given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/jobs/[job-id]</code></td>
+    <td>Details for the given job</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages</code></td>
+    <td>A list of all stages for a given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]</code></td>
+    <td>A list of all attempts for the given stage</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]</code></td>
+    <td>Details for the given stage attempt</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]/taskSummary</code></td>
+    <td>Summary metrics of all tasks in the given stage attempt</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]/taskList</code></td>
+    <td>A list of all tasks for the given stage attempt</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/executors</code></td>
+    <td>A list of all executors for the given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/storage/rdd</code></td>
+    <td>A list of stored RDDs for the given application</td>
+  </tr>
+  <tr>
+    <td><code>/applications/[app-id]/storage/rdd/[rdd-id]</code></td>
+    <td>Details for the storage status of a given RDD</td>
+  </tr>
+</table>
+
+When running on Yarn, each application has multiple attempts, so `[app-id]` is actually
+`[app-id]/[attempt-id]` in all cases.
+
+These endpoints have been strongly versioned to make it easier to develop applications on top.
+ In particular, Spark guarantees:
+
+* Endpoints will never be removed from one version
+* Individual fields will never be removed for any given endpoint
+* New endpoints may be added
+* New fields may be added to existing endpoints
+* New versions of the api may be added in the future at a separate endpoint (eg., `json/v2`).  New versions are *not* required to be backwards compatible.
+* Api versions may be dropped, but only after at least one minor release of co-existing with a new api version
+
+Note that even when examining the UI of a running applications, the `applications/[app-id]` portion is
+still required, though there is only one application available.  Eg. to see the list of jobs for the
+running app, you would go to `http://localhost:4040/json/v1/applications/[app-id]/jobs`.  This is to
+keep the paths consistent in both modes.
+
 # Metrics
 
 Spark has a configurable metrics system based on the 
diff --git a/pom.xml b/pom.xml
index 12ad7c5be4e40..94cf28332cf47 100644
--- a/pom.xml
+++ b/pom.xml
@@ -623,6 +623,18 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-server</artifactId>
+        <version>1.9</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-core</artifactId>
+        <version>1.9</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-compiler</artifactId>

From f5ff4a84c4c75143086aae7d38730156bee35933 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Fri, 8 May 2015 11:14:39 -0700
Subject: [PATCH 035/320] [SPARK-7383] [ML] Feature Parity in PySpark for
 ml.features

Implemented python wrappers for Scala functions that don't exist in `ml.features`

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #5991 from brkyvz/ml-feat-PR and squashes the following commits:

adcca55 [Burak Yavuz] add regex tokenizer to __all__
b91cb44 [Burak Yavuz] addressed comments
bd39fd2 [Burak Yavuz] remove addition
b82bd7c [Burak Yavuz] Parity in PySpark for ml.features
---
 .../ml/feature/PolynomialExpansion.scala      |   2 +-
 .../apache/spark/ml/feature/Tokenizer.scala   |   2 +-
 python/pyspark/ml/feature.py                  | 797 +++++++++++++++++-
 .../ml/param/_shared_params_code_gen.py       |   4 +-
 python/pyspark/ml/param/shared.py             |  89 ++
 5 files changed, 851 insertions(+), 43 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 63e190c8aae53..9e6177ca27e4a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.DataType
  * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
  * expansion of a product of sums expresses it as a sum of products by using the fact that
  * multiplication distributes over addition". Take a 2-variable feature vector as an example:
- * `(x, y)`, if we want to expand it with degree 2, then we get `(x, y, x * x, x * y, y * y)`.
+ * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
  */
 @AlphaComponent
 class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExpansion] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 2863b7621526e..649c217b16590 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -42,7 +42,7 @@ class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] {
 
 /**
  * :: AlphaComponent ::
- * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default) 
+ * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default)
  * or using it to split the text (set matching to false). Optional parameters also allow filtering
  * tokens using a minimal length.
  * It returns an array of strings that can be empty.
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 705a368192c24..f35bc1463d51b 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -16,61 +16,72 @@
 #
 
 from pyspark.rdd import ignore_unicode_prefix
-from pyspark.ml.param.shared import HasInputCol, HasInputCols, HasOutputCol, HasNumFeatures
+from pyspark.ml.param.shared import *
 from pyspark.ml.util import keyword_only
-from pyspark.ml.wrapper import JavaTransformer
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaTransformer
 from pyspark.mllib.common import inherit_doc
 
-__all__ = ['Tokenizer', 'HashingTF', 'VectorAssembler']
+__all__ = ['Binarizer', 'HashingTF', 'IDF', 'IDFModel', 'Normalizer', 'OneHotEncoder',
+           'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 'StandardScalerModel',
+           'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer',
+           'Word2Vec', 'Word2VecModel']
 
 
 @inherit_doc
-@ignore_unicode_prefix
-class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
-    A tokenizer that converts the input string to lowercase and then
-    splits it by white spaces.
+    Binarize a column of continuous features given a threshold.
 
-    >>> from pyspark.sql import Row
-    >>> df = sc.parallelize([Row(text="a b c")]).toDF()
-    >>> tokenizer = Tokenizer(inputCol="text", outputCol="words")
-    >>> tokenizer.transform(df).head()
-    Row(text=u'a b c', words=[u'a', u'b', u'c'])
-    >>> # Change a parameter.
-    >>> tokenizer.setParams(outputCol="tokens").transform(df).head()
-    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
-    >>> # Temporarily modify a parameter.
-    >>> tokenizer.transform(df, {tokenizer.outputCol: "words"}).head()
-    Row(text=u'a b c', words=[u'a', u'b', u'c'])
-    >>> tokenizer.transform(df).head()
-    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
-    >>> # Must use keyword arguments to specify params.
-    >>> tokenizer.setParams("text")
-    Traceback (most recent call last):
-        ...
-    TypeError: Method setParams forces keyword arguments.
+    >>> df = sqlContext.createDataFrame([(0.5,)], ["values"])
+    >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features")
+    >>> binarizer.transform(df).head().features
+    0.0
+    >>> binarizer.setParams(outputCol="freqs").transform(df).head().freqs
+    0.0
+    >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}
+    >>> binarizer.transform(df, params).head().vector
+    1.0
     """
 
-    _java_class = "org.apache.spark.ml.feature.Tokenizer"
+    _java_class = "org.apache.spark.ml.feature.Binarizer"
+    # a placeholder to make it appear in the generated doc
+    threshold = Param(Params._dummy(), "threshold",
+                      "threshold in binary classification prediction, in range [0, 1]")
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None):
+    def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
         """
-        __init__(self, inputCol=None, outputCol=None)
+        __init__(self, threshold=0.0, inputCol=None, outputCol=None)
         """
-        super(Tokenizer, self).__init__()
+        super(Binarizer, self).__init__()
+        self.threshold = Param(self, "threshold",
+                               "threshold in binary classification prediction, in range [0, 1]")
+        self._setDefault(threshold=0.0)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
-    def setParams(self, inputCol=None, outputCol=None):
+    def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
         """
-        setParams(self, inputCol="input", outputCol="output")
-        Sets params for this Tokenizer.
+        setParams(self, threshold=0.0, inputCol=None, outputCol=None)
+        Sets params for this Binarizer.
         """
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        self.paramMap[self.threshold] = value
+        return self
+
+    def getThreshold(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.threshold)
+
 
 @inherit_doc
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
@@ -78,8 +89,7 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
     Maps a sequence of terms to their term frequencies using the
     hashing trick.
 
-    >>> from pyspark.sql import Row
-    >>> df = sc.parallelize([Row(words=["a", "b", "c"])]).toDF()
+    >>> df = sqlContext.createDataFrame([(["a", "b", "c"],)], ["words"])
     >>> hashingTF = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
     >>> hashingTF.transform(df).head().features
     SparseVector(10, {7: 1.0, 8: 1.0, 9: 1.0})
@@ -112,13 +122,520 @@ def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
         return self._set(**kwargs)
 
 
+@inherit_doc
+class IDF(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    Compute the Inverse Document Frequency (IDF) given a collection of documents.
+
+    >>> from pyspark.mllib.linalg import DenseVector
+    >>> df = sqlContext.createDataFrame([(DenseVector([1.0, 2.0]),),
+    ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
+    >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf")
+    >>> idf.fit(df).transform(df).head().idf
+    DenseVector([0.0, 0.0])
+    >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs
+    DenseVector([0.0, 0.0])
+    >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}
+    >>> idf.fit(df, params).transform(df).head().vector
+    DenseVector([0.2877, 0.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.IDF"
+
+    # a placeholder to make it appear in the generated doc
+    minDocFreq = Param(Params._dummy(), "minDocFreq",
+                       "minimum of documents in which a term should appear for filtering")
+
+    @keyword_only
+    def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
+        """
+        __init__(self, minDocFreq=0, inputCol=None, outputCol=None)
+        """
+        super(IDF, self).__init__()
+        self.minDocFreq = Param(self, "minDocFreq",
+                                "minimum of documents in which a term should appear for filtering")
+        self._setDefault(minDocFreq=0)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
+        """
+        setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
+        Sets params for this IDF.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setMinDocFreq(self, value):
+        """
+        Sets the value of :py:attr:`minDocFreq`.
+        """
+        self.paramMap[self.minDocFreq] = value
+        return self
+
+    def getMinDocFreq(self):
+        """
+        Gets the value of minDocFreq or its default value.
+        """
+        return self.getOrDefault(self.minDocFreq)
+
+
+class IDFModel(JavaModel):
+    """
+    Model fitted by IDF.
+    """
+
+
+@inherit_doc
+class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+     Normalize a vector to have unit norm using the given p-norm.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])
+    >>> normalizer = Normalizer(p=2.0, inputCol="dense", outputCol="features")
+    >>> normalizer.transform(df).head().features
+    DenseVector([0.6, -0.8])
+    >>> normalizer.setParams(inputCol="sparse", outputCol="freqs").transform(df).head().freqs
+    SparseVector(4, {1: 0.8, 3: 0.6})
+    >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}
+    >>> normalizer.transform(df, params).head().vector
+    DenseVector([0.4286, -0.5714])
+    """
+
+    # a placeholder to make it appear in the generated doc
+    p = Param(Params._dummy(), "p", "the p norm value.")
+
+    _java_class = "org.apache.spark.ml.feature.Normalizer"
+
+    @keyword_only
+    def __init__(self, p=2.0, inputCol=None, outputCol=None):
+        """
+        __init__(self, p=2.0, inputCol=None, outputCol=None)
+        """
+        super(Normalizer, self).__init__()
+        self.p = Param(self, "p", "the p norm value.")
+        self._setDefault(p=2.0)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, p=2.0, inputCol=None, outputCol=None):
+        """
+        setParams(self, p=2.0, inputCol=None, outputCol=None)
+        Sets params for this Normalizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setP(self, value):
+        """
+        Sets the value of :py:attr:`p`.
+        """
+        self.paramMap[self.p] = value
+        return self
+
+    def getP(self):
+        """
+        Gets the value of p or its default value.
+        """
+        return self.getOrDefault(self.p)
+
+
+@inherit_doc
+class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A one-hot encoder that maps a column of label indices to a column of binary vectors, with
+    at most a single one-value. By default, the binary vector has an element for each category, so
+    with 5 categories, an input value of 2.0 would map to an output vector of
+    (0.0, 0.0, 1.0, 0.0, 0.0). If includeFirst is set to false, the first category is omitted, so
+    the output vector for the previous example would be (0.0, 1.0, 0.0, 0.0) and an input value
+    of 0.0 would map to a vector of all zeros. Including the first category makes the vector columns
+    linearly dependent because they sum up to one.
+
+    TODO: This method requires the use of StringIndexer first. Decouple them.
+
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> encoder = OneHotEncoder(includeFirst=False, inputCol="indexed", outputCol="features")
+    >>> encoder.transform(td).head().features
+    SparseVector(2, {})
+    >>> encoder.setParams(outputCol="freqs").transform(td).head().freqs
+    SparseVector(2, {})
+    >>> params = {encoder.includeFirst: True, encoder.outputCol: "test"}
+    >>> encoder.transform(td, params).head().test
+    SparseVector(3, {0: 1.0})
+    """
+
+    _java_class = "org.apache.spark.ml.feature.OneHotEncoder"
+
+    # a placeholder to make it appear in the generated doc
+    includeFirst = Param(Params._dummy(), "includeFirst", "include first category")
+
+    @keyword_only
+    def __init__(self, includeFirst=True, inputCol=None, outputCol=None):
+        """
+        __init__(self, includeFirst=True, inputCol=None, outputCol=None)
+        """
+        super(OneHotEncoder, self).__init__()
+        self.includeFirst = Param(self, "includeFirst", "include first category")
+        self._setDefault(includeFirst=True)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, includeFirst=True, inputCol=None, outputCol=None):
+        """
+        setParams(self, includeFirst=True, inputCol=None, outputCol=None)
+        Sets params for this OneHotEncoder.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setIncludeFirst(self, value):
+        """
+        Sets the value of :py:attr:`includeFirst`.
+        """
+        self.paramMap[self.includeFirst] = value
+        return self
+
+    def getIncludeFirst(self):
+        """
+        Gets the value of includeFirst or its default value.
+        """
+        return self.getOrDefault(self.includeFirst)
+
+
+@inherit_doc
+class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
+    which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, "In mathematics, an
+    expansion of a product of sums expresses it as a sum of products by using the fact that
+    multiplication distributes over addition". Take a 2-variable feature vector as an example:
+    `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])
+    >>> px = PolynomialExpansion(degree=2, inputCol="dense", outputCol="expanded")
+    >>> px.transform(df).head().expanded
+    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+    >>> px.setParams(outputCol="test").transform(df).head().test
+    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.PolynomialExpansion"
+
+    # a placeholder to make it appear in the generated doc
+    degree = Param(Params._dummy(), "degree", "the polynomial degree to expand (>= 1)")
+
+    @keyword_only
+    def __init__(self, degree=2, inputCol=None, outputCol=None):
+        """
+        __init__(self, degree=2, inputCol=None, outputCol=None)
+        """
+        super(PolynomialExpansion, self).__init__()
+        self.degree = Param(self, "degree", "the polynomial degree to expand (>= 1)")
+        self._setDefault(degree=2)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, degree=2, inputCol=None, outputCol=None):
+        """
+        setParams(self, degree=2, inputCol=None, outputCol=None)
+        Sets params for this PolynomialExpansion.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setDegree(self, value):
+        """
+        Sets the value of :py:attr:`degree`.
+        """
+        self.paramMap[self.degree] = value
+        return self
+
+    def getDegree(self):
+        """
+        Gets the value of degree or its default value.
+        """
+        return self.getOrDefault(self.degree)
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default)
+    or using it to split the text (set matching to false). Optional parameters also allow filtering
+    tokens using a minimal length.
+    It returns an array of strings that can be empty.
+
+    >>> df = sqlContext.createDataFrame([("a b c",)], ["text"])
+    >>> reTokenizer = RegexTokenizer(inputCol="text", outputCol="words")
+    >>> reTokenizer.transform(df).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> # Change a parameter.
+    >>> reTokenizer.setParams(outputCol="tokens").transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Temporarily modify a parameter.
+    >>> reTokenizer.transform(df, {reTokenizer.outputCol: "words"}).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> reTokenizer.transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Must use keyword arguments to specify params.
+    >>> reTokenizer.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    """
+
+    _java_class = "org.apache.spark.ml.feature.RegexTokenizer"
+    # a placeholder to make it appear in the generated doc
+    minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)")
+    gaps = Param(Params._dummy(), "gaps", "Set regex to match gaps or tokens")
+    pattern = Param(Params._dummy(), "pattern", "regex pattern used for tokenizing")
+
+    @keyword_only
+    def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                 inputCol=None, outputCol=None):
+        """
+        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                 inputCol=None, outputCol=None)
+        """
+        super(RegexTokenizer, self).__init__()
+        self.minTokenLength = Param(self, "minLength", "minimum token length (>= 0)")
+        self.gaps = Param(self, "gaps", "Set regex to match gaps or tokens")
+        self.pattern = Param(self, "pattern", "regex pattern used for tokenizing")
+        self._setDefault(minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                  inputCol=None, outputCol=None):
+        """
+        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+                  inputCol="input", outputCol="output")
+        Sets params for this RegexTokenizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setMinTokenLength(self, value):
+        """
+        Sets the value of :py:attr:`minTokenLength`.
+        """
+        self.paramMap[self.minTokenLength] = value
+        return self
+
+    def getMinTokenLength(self):
+        """
+        Gets the value of minTokenLength or its default value.
+        """
+        return self.getOrDefault(self.minTokenLength)
+
+    def setGaps(self, value):
+        """
+        Sets the value of :py:attr:`gaps`.
+        """
+        self.paramMap[self.gaps] = value
+        return self
+
+    def getGaps(self):
+        """
+        Gets the value of gaps or its default value.
+        """
+        return self.getOrDefault(self.gaps)
+
+    def setPattern(self, value):
+        """
+        Sets the value of :py:attr:`pattern`.
+        """
+        self.paramMap[self.pattern] = value
+        return self
+
+    def getPattern(self):
+        """
+        Gets the value of pattern or its default value.
+        """
+        return self.getOrDefault(self.pattern)
+
+
+@inherit_doc
+class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    Standardizes features by removing the mean and scaling to unit variance using column summary
+    statistics on the samples in the training set.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
+    >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
+    >>> model = standardScaler.fit(df)
+    >>> model.transform(df).collect()[1].scaled
+    DenseVector([1.4142])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.StandardScaler"
+
+    # a placeholder to make it appear in the generated doc
+    withMean = Param(Params._dummy(), "withMean", "Center data with mean")
+    withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation")
+
+    @keyword_only
+    def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+        """
+        __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        """
+        super(StandardScaler, self).__init__()
+        self.withMean = Param(self, "withMean", "Center data with mean")
+        self.withStd = Param(self, "withStd", "Scale to unit standard deviation")
+        self._setDefault(withMean=False, withStd=True)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+        """
+        setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        Sets params for this StandardScaler.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setWithMean(self, value):
+        """
+        Sets the value of :py:attr:`withMean`.
+        """
+        self.paramMap[self.withMean] = value
+        return self
+
+    def getWithMean(self):
+        """
+        Gets the value of withMean or its default value.
+        """
+        return self.getOrDefault(self.withMean)
+
+    def setWithStd(self, value):
+        """
+        Sets the value of :py:attr:`withStd`.
+        """
+        self.paramMap[self.withStd] = value
+        return self
+
+    def getWithStd(self):
+        """
+        Gets the value of withStd or its default value.
+        """
+        return self.getOrDefault(self.withStd)
+
+
+class StandardScalerModel(JavaModel):
+    """
+    Model fitted by StandardScaler.
+    """
+
+
+@inherit_doc
+class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    A label indexer that maps a string column of labels to an ML column of label indices.
+    If the input column is numeric, we cast it to string and index the string values.
+    The indices are in [0, numLabels), ordered by label frequencies.
+    So the most frequent label gets index 0.
+
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
+    ...     key=lambda x: x[0])
+    [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)]
+    """
+
+    _java_class = "org.apache.spark.ml.feature.StringIndexer"
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None):
+        """
+        __init__(self, inputCol=None, outputCol=None)
+        """
+        super(StringIndexer, self).__init__()
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, inputCol=None, outputCol=None):
+        """
+        setParams(self, inputCol=None, outputCol=None)
+        Sets params for this StringIndexer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
+class StringIndexerModel(JavaModel):
+    """
+    Model fitted by StringIndexer.
+    """
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    A tokenizer that converts the input string to lowercase and then
+    splits it by white spaces.
+
+    >>> df = sqlContext.createDataFrame([("a b c",)], ["text"])
+    >>> tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    >>> tokenizer.transform(df).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> # Change a parameter.
+    >>> tokenizer.setParams(outputCol="tokens").transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Temporarily modify a parameter.
+    >>> tokenizer.transform(df, {tokenizer.outputCol: "words"}).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> tokenizer.transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Must use keyword arguments to specify params.
+    >>> tokenizer.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    """
+
+    _java_class = "org.apache.spark.ml.feature.Tokenizer"
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None):
+        """
+        __init__(self, inputCol=None, outputCol=None)
+        """
+        super(Tokenizer, self).__init__()
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, inputCol=None, outputCol=None):
+        """
+        setParams(self, inputCol="input", outputCol="output")
+        Sets params for this Tokenizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
 @inherit_doc
 class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     """
     A feature transformer that merges multiple columns into a vector column.
 
-    >>> from pyspark.sql import Row
-    >>> df = sc.parallelize([Row(a=1, b=0, c=3)]).toDF()
+    >>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
     >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
     >>> vecAssembler.transform(df).head().features
     DenseVector([1.0, 0.0, 3.0])
@@ -137,7 +654,6 @@ def __init__(self, inputCols=None, outputCol=None):
         __init__(self, inputCols=None, outputCol=None)
         """
         super(VectorAssembler, self).__init__()
-        self._setDefault()
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -151,10 +667,208 @@ def setParams(self, inputCols=None, outputCol=None):
         return self._set(**kwargs)
 
 
+@inherit_doc
+class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
+    """
+    Class for indexing categorical feature columns in a dataset of [[Vector]].
+
+    This has 2 usage modes:
+      - Automatically identify categorical features (default behavior)
+         - This helps process a dataset of unknown vectors into a dataset with some continuous
+           features and some categorical features. The choice between continuous and categorical
+           is based upon a maxCategories parameter.
+         - Set maxCategories to the maximum number of categorical any categorical feature should
+           have.
+         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
+           If maxCategories = 2, then feature 0 will be declared categorical and use indices {0, 1},
+           and feature 1 will be declared continuous.
+      - Index all features, if all features are categorical
+         - If maxCategories is set to be very large, then this will build an index of unique
+           values for all features.
+         - Warning: This can cause problems if features are continuous since this will collect ALL
+           unique values to the driver.
+         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
+           If maxCategories >= 3, then both features will be declared categorical.
+
+     This returns a model which can transform categorical features to use 0-based indices.
+
+    Index stability:
+      - This is not guaranteed to choose the same category index across multiple runs.
+      - If a categorical feature includes value 0, then this is guaranteed to map value 0 to
+        index 0. This maintains vector sparsity.
+      - More stability may be added in the future.
+
+     TODO: Future extensions: The following functionality is planned for the future:
+      - Preserve metadata in transform; if a feature's metadata is already present,
+        do not recompute.
+      - Specify certain features to not index, either via a parameter or via existing metadata.
+      - Add warning if a categorical feature has only 1 category.
+      - Add option for allowing unknown categories.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([(Vectors.dense([-1.0, 0.0]),),
+    ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])
+    >>> indexer = VectorIndexer(maxCategories=2, inputCol="a", outputCol="indexed")
+    >>> model = indexer.fit(df)
+    >>> model.transform(df).head().indexed
+    DenseVector([1.0, 0.0])
+    >>> indexer.setParams(outputCol="test").fit(df).transform(df).collect()[1].test
+    DenseVector([0.0, 1.0])
+    >>> params = {indexer.maxCategories: 3, indexer.outputCol: "vector"}
+    >>> model2 = indexer.fit(df, params)
+    >>> model2.transform(df).head().vector
+    DenseVector([1.0, 0.0])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.VectorIndexer"
+    # a placeholder to make it appear in the generated doc
+    maxCategories = Param(Params._dummy(), "maxCategories",
+                          "Threshold for the number of values a categorical feature can take " +
+                          "(>= 2). If a feature is found to have > maxCategories values, then " +
+                          "it is declared continuous.")
+
+    @keyword_only
+    def __init__(self, maxCategories=20, inputCol=None, outputCol=None):
+        """
+        __init__(self, maxCategories=20, inputCol=None, outputCol=None)
+        """
+        super(VectorIndexer, self).__init__()
+        self.maxCategories = Param(self, "maxCategories",
+                                   "Threshold for the number of values a categorical feature " +
+                                   "can take (>= 2). If a feature is found to have " +
+                                   "> maxCategories values, then it is declared continuous.")
+        self._setDefault(maxCategories=20)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, maxCategories=20, inputCol=None, outputCol=None):
+        """
+        setParams(self, maxCategories=20, inputCol=None, outputCol=None)
+        Sets params for this VectorIndexer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setMaxCategories(self, value):
+        """
+        Sets the value of :py:attr:`maxCategories`.
+        """
+        self.paramMap[self.maxCategories] = value
+        return self
+
+    def getMaxCategories(self):
+        """
+        Gets the value of maxCategories or its default value.
+        """
+        return self.getOrDefault(self.maxCategories)
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
+    """
+    Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
+    natural language processing or machine learning process.
+
+    >>> sent = ("a b " * 100 + "a c " * 10).split(" ")
+    >>> doc = sqlContext.createDataFrame([(sent,), (sent,)], ["sentence"])
+    >>> model = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model").fit(doc)
+    >>> model.transform(doc).head().model
+    DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
+    """
+
+    _java_class = "org.apache.spark.ml.feature.Word2Vec"
+    # a placeholder to make it appear in the generated doc
+    vectorSize = Param(Params._dummy(), "vectorSize",
+                       "the dimension of codes after transforming from words")
+    numPartitions = Param(Params._dummy(), "numPartitions",
+                          "number of partitions for sentences of words")
+    minCount = Param(Params._dummy(), "minCount",
+                     "the minimum number of times a token must appear to be included in the " +
+                     "word2vec model's vocabulary")
+
+    @keyword_only
+    def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                 seed=42, inputCol=None, outputCol=None):
+        """
+        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                 seed=42, inputCol=None, outputCol=None)
+        """
+        super(Word2Vec, self).__init__()
+        self.vectorSize = Param(self, "vectorSize",
+                                "the dimension of codes after transforming from words")
+        self.numPartitions = Param(self, "numPartitions",
+                                   "number of partitions for sentences of words")
+        self.minCount = Param(self, "minCount",
+                              "the minimum number of times a token must appear to be included " +
+                              "in the word2vec model's vocabulary")
+        self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                         seed=42)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                  seed=42, inputCol=None, outputCol=None):
+        """
+        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
+                 inputCol=None, outputCol=None)
+        Sets params for this Word2Vec.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setVectorSize(self, value):
+        """
+        Sets the value of :py:attr:`vectorSize`.
+        """
+        self.paramMap[self.vectorSize] = value
+        return self
+
+    def getVectorSize(self):
+        """
+        Gets the value of vectorSize or its default value.
+        """
+        return self.getOrDefault(self.vectorSize)
+
+    def setNumPartitions(self, value):
+        """
+        Sets the value of :py:attr:`numPartitions`.
+        """
+        self.paramMap[self.numPartitions] = value
+        return self
+
+    def getNumPartitions(self):
+        """
+        Gets the value of numPartitions or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
+
+    def setMinCount(self, value):
+        """
+        Sets the value of :py:attr:`minCount`.
+        """
+        self.paramMap[self.minCount] = value
+        return self
+
+    def getMinCount(self):
+        """
+        Gets the value of minCount or its default value.
+        """
+        return self.getOrDefault(self.minCount)
+
+
+class Word2VecModel(JavaModel):
+    """
+    Model fitted by Word2Vec.
+    """
+
+
 if __name__ == "__main__":
     import doctest
     from pyspark.context import SparkContext
-    from pyspark.sql import SQLContext
+    from pyspark.sql import Row, SQLContext
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
@@ -162,8 +876,11 @@ def setParams(self, inputCols=None, outputCol=None):
     sqlContext = SQLContext(sc)
     globs['sc'] = sc
     globs['sqlContext'] = sqlContext
-    (failure_count, test_count) = doctest.testmod(
-        globs=globs, optionflags=doctest.ELLIPSIS)
+    testData = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="b"),
+                               Row(id=2, label="c"), Row(id=3, label="a"),
+                               Row(id=4, label="a"), Row(id=5, label="c")], 2)
+    globs['stringIndDf'] = sqlContext.createDataFrame(testData)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     sc.stop()
     if failure_count:
         exit(-1)
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index c1c8e921dda87..ee901f25847bc 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -97,7 +97,9 @@ def get$Name(self):
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
-        ("numFeatures", "number of features", None)]
+        ("seed", "random seed", None),
+        ("tol", "the convergence tolerance for iterative algorithms", None),
+        ("stepSize", "Step size to be used for each iteration of optimization.", None)]
     code = []
     for name, doc, defaultValueStr in shared:
         code.append(_gen_param_code(name, doc, defaultValueStr))
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index aaf80f00085bf..5e7529c1dcf1e 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -308,3 +308,92 @@ def getNumFeatures(self):
         Gets the value of numFeatures or its default value.
         """
         return self.getOrDefault(self.numFeatures)
+
+
+class HasSeed(Params):
+    """
+    Mixin for param seed: random seed.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    seed = Param(Params._dummy(), "seed", "random seed")
+
+    def __init__(self):
+        super(HasSeed, self).__init__()
+        #: param for random seed
+        self.seed = Param(self, "seed", "random seed")
+        if None is not None:
+            self._setDefault(seed=None)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        self.paramMap[self.seed] = value
+        return self
+
+    def getSeed(self):
+        """
+        Gets the value of seed or its default value.
+        """
+        return self.getOrDefault(self.seed)
+
+
+class HasTol(Params):
+    """
+    Mixin for param tol: the convergence tolerance for iterative algorithms.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms")
+
+    def __init__(self):
+        super(HasTol, self).__init__()
+        #: param for the convergence tolerance for iterative algorithms
+        self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
+        if None is not None:
+            self._setDefault(tol=None)
+
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        self.paramMap[self.tol] = value
+        return self
+
+    def getTol(self):
+        """
+        Gets the value of tol or its default value.
+        """
+        return self.getOrDefault(self.tol)
+
+
+class HasStepSize(Params):
+    """
+    Mixin for param stepSize: Step size to be used for each iteration of optimization..
+    """
+
+    # a placeholder to make it appear in the generated doc
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size to be used for each iteration of optimization.")
+
+    def __init__(self):
+        super(HasStepSize, self).__init__()
+        #: param for Step size to be used for each iteration of optimization.
+        self.stepSize = Param(self, "stepSize",
+                              "Step size to be used for each iteration of optimization.")
+        if None is not None:
+            self._setDefault(stepSize=None)
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        self.paramMap[self.stepSize] = value
+        return self
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)

From 65afd3ce8b8a0b00f4ea8294eac14b72e964872d Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Fri, 8 May 2015 11:16:04 -0700
Subject: [PATCH 036/320] [SPARK-7474] [MLLIB] update ParamGridBuilder doctest

Multiline commands are properly handled in this PR. oefirouz

![screen shot 2015-05-07 at 10 53 25 pm](https://cloud.githubusercontent.com/assets/829644/7531290/02ad2fd4-f50c-11e4-8c04-e58d1a61ad69.png)

Author: Xiangrui Meng <meng@databricks.com>

Closes #6001 from mengxr/SPARK-7474 and squashes the following commits:

b94b11d [Xiangrui Meng] update ParamGridBuilder doctest
---
 python/pyspark/ml/tuning.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 1e04c37fca53e..28e3727f2c064 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -27,24 +27,22 @@
 
 
 class ParamGridBuilder(object):
-    """
+    r"""
     Builder for a param grid used in grid search-based model selection.
 
-    >>> from classification import LogisticRegression
+    >>> from pyspark.ml.classification import LogisticRegression
     >>> lr = LogisticRegression()
-    >>> output = ParamGridBuilder().baseOn({lr.labelCol: 'l'}) \
-            .baseOn([lr.predictionCol, 'p']) \
-            .addGrid(lr.regParam, [1.0, 2.0, 3.0]) \
-            .addGrid(lr.maxIter, [1, 5]) \
-            .addGrid(lr.featuresCol, ['f']) \
-            .build()
-    >>> expected = [ \
-{lr.regParam: 1.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 2.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 3.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 1.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 2.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, \
-{lr.regParam: 3.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}]
+    >>> output = ParamGridBuilder() \
+    ...     .baseOn({lr.labelCol: 'l'}) \
+    ...     .baseOn([lr.predictionCol, 'p']) \
+    ...     .addGrid(lr.regParam, [1.0, 2.0]) \
+    ...     .addGrid(lr.maxIter, [1, 5]) \
+    ...     .build()
+    >>> expected = [
+    ...     {lr.regParam: 1.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 2.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 1.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 2.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}]
     >>> len(output) == len(expected)
     True
     >>> all([m in expected for m in output])

From 008a60dd371e76819d8e08ab638cac7b3a48c9fc Mon Sep 17 00:00:00 2001
From: hqzizania <qian.huang@intel.com>
Date: Fri, 8 May 2015 11:25:04 -0700
Subject: [PATCH 037/320] [SPARK-6824] Fill the docs for DataFrame API in
 SparkR

This patch also removes the RDD docs from being built as a part of roxygen just by the method to delete
" ' '" of " \#' ".

Author: hqzizania <qian.huang@intel.com>
Author: qhuang <qian.huang@intel.com>

Closes #5969 from hqzizania/R1 and squashes the following commits:

6d27696 [qhuang] fixes in NAMESPACE
eb4b095 [qhuang] remove more docs
6394579 [qhuang] remove RDD docs in generics.R
6813860 [hqzizania] Fill the docs for DataFrame API in SparkR
857220f [hqzizania] remove the pairRDD docs from being built as a part of roxygen
c045d64 [hqzizania] remove the RDD docs from being built as a part of roxygen
---
 R/pkg/DESCRIPTION    |    2 +-
 R/pkg/NAMESPACE      |    4 -
 R/pkg/R/DataFrame.R  |   95 +--
 R/pkg/R/RDD.R        | 1546 +++++++++++++++++++++---------------------
 R/pkg/R/SQLContext.R |   64 +-
 R/pkg/R/broadcast.R  |   64 +-
 R/pkg/R/context.R    |  240 +++----
 R/pkg/R/generics.R   |  318 ++++-----
 R/pkg/R/pairRDD.R    |  886 ++++++++++++------------
 9 files changed, 1610 insertions(+), 1609 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1c1779a763c7e..efc85bbc4b316 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -15,11 +15,11 @@ Suggests:
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
+    'schema.R'
     'generics.R'
     'jobj.R'
     'RDD.R'
     'pairRDD.R'
-    'schema.R'
     'column.R'
     'group.R'
     'DataFrame.R'
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 3fb92be0940b7..7611f479a628b 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -26,7 +26,6 @@ exportMethods("cache",
               "intersect",
               "isLocal",
               "join",
-              "length",
               "limit",
               "orderBy",
               "names",
@@ -101,9 +100,6 @@ export("cacheTable",
        "tables",
        "uncacheTable")
 
-export("sparkRSQL.init",
-       "sparkRHive.init")
-
 export("structField",
        "structField.jobj",
        "structField.character",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 47d92f141cc7d..354642e7bc307 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
 
 #' @rdname DataFrame
 #' @export
+#'
+#' @param sdf A Java object reference to the backing Scala DataFrame
+#' @param isCached TRUE if the dataFrame is cached
 dataFrame <- function(sdf, isCached = FALSE) {
   new("DataFrame", sdf, isCached)
 }
@@ -244,7 +247,7 @@ setMethod("columns",
           })
 
 #' @rdname columns
-#' @export
+#' @aliases names,DataFrame,function-method
 setMethod("names",
           signature(x = "DataFrame"),
           function(x) {
@@ -399,23 +402,23 @@ setMethod("repartition",
             dataFrame(sdf)     
           })
 
-#' toJSON
-#'
-#' Convert the rows of a DataFrame into JSON objects and return an RDD where
-#' each element contains a JSON string.
-#'
-#' @param x A SparkSQL DataFrame
-#' @return A StringRRDD of JSON objects
-#' @rdname tojson
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' newRDD <- toJSON(df)
-#'}
+# toJSON
+#
+# Convert the rows of a DataFrame into JSON objects and return an RDD where
+# each element contains a JSON string.
+#
+#@param x A SparkSQL DataFrame
+# @return A StringRRDD of JSON objects
+# @rdname tojson
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# newRDD <- toJSON(df)
+#}
 setMethod("toJSON",
           signature(x = "DataFrame"),
           function(x) {
@@ -578,8 +581,8 @@ setMethod("limit",
             dataFrame(res)
           })
 
-# Take the first NUM rows of a DataFrame and return a the results as a data.frame
-
+#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
+#' 
 #' @rdname take
 #' @export
 #' @examples
@@ -644,22 +647,22 @@ setMethod("first",
             take(x, 1)
           })
 
-#' toRDD()
-#' 
-#' Converts a Spark DataFrame to an RDD while preserving column names.
-#' 
-#' @param x A Spark DataFrame
-#' 
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- jsonFile(sqlCtx, path)
-#' rdd <- toRDD(df)
-#' }
+# toRDD()
+# 
+# Converts a Spark DataFrame to an RDD while preserving column names.
+# 
+# @param x A Spark DataFrame
+# 
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# path <- "path/to/file.json"
+# df <- jsonFile(sqlCtx, path)
+# rdd <- toRDD(df)
+# }
 setMethod("toRDD",
           signature(x = "DataFrame"),
           function(x) {
@@ -706,6 +709,7 @@ setMethod("groupBy",
 #'
 #' Compute aggregates by specifying a list of columns
 #'
+#' @param x a DataFrame
 #' @rdname DataFrame
 #' @export
 setMethod("agg",
@@ -721,7 +725,7 @@ setMethod("agg",
 # the requested map function.                                                     #
 ###################################################################################
 
-#' @rdname lapply
+# @rdname lapply
 setMethod("lapply",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -729,14 +733,14 @@ setMethod("lapply",
             lapply(rdd, FUN)
           })
 
-#' @rdname lapply
+# @rdname lapply
 setMethod("map",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
 
-#' @rdname flatMap
+# @rdname flatMap
 setMethod("flatMap",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -744,7 +748,7 @@ setMethod("flatMap",
             flatMap(rdd, FUN)
           })
 
-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("lapplyPartition",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
@@ -752,14 +756,14 @@ setMethod("lapplyPartition",
             lapplyPartition(rdd, FUN)
           })
 
-#' @rdname lapplyPartition
+# @rdname lapplyPartition
 setMethod("mapPartitions",
           signature(X = "DataFrame", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
 
-#' @rdname foreach
+# @rdname foreach
 setMethod("foreach",
           signature(x = "DataFrame", func = "function"),
           function(x, func) {
@@ -767,7 +771,7 @@ setMethod("foreach",
             foreach(rdd, func)
           })
 
-#' @rdname foreach
+# @rdname foreach
 setMethod("foreachPartition",
           signature(x = "DataFrame", func = "function"),
           function(x, func) {
@@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
             getColumn(x, name)
           })
 
+#' @rdname select
 setMethod("$<-", signature(x = "DataFrame"),
           function(x, name, value) {
             stopifnot(class(value) == "Column" || is.null(value))
@@ -1009,7 +1014,7 @@ setMethod("sortDF",
           })
 
 #' @rdname sortDF
-#' @export
+#' @aliases orderBy,DataFrame,function-method
 setMethod("orderBy",
           signature(x = "DataFrame", col = "characterOrColumn"),
           function(x, col) {
@@ -1046,7 +1051,7 @@ setMethod("filter",
           })
 
 #' @rdname filter
-#' @export
+#' @aliases where,DataFrame,function-method
 setMethod("where",
           signature(x = "DataFrame", condition = "characterOrColumn"),
           function(x, condition) {
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index d1018c2361670..73999a6737032 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -19,16 +19,16 @@
 
 setOldClass("jobj")
 
-#' @title S4 class that represents an RDD
-#' @description RDD can be created using functions like
-#'              \code{parallelize}, \code{textFile} etc.
-#' @rdname RDD
-#' @seealso parallelize, textFile
-#'
-#' @slot env An R environment that stores bookkeeping states of the RDD
-#' @slot jrdd Java object reference to the backing JavaRDD
-#' to an RDD
-#' @export
+# @title S4 class that represents an RDD
+# @description RDD can be created using functions like
+#              \code{parallelize}, \code{textFile} etc.
+# @rdname RDD
+# @seealso parallelize, textFile
+#
+# @slot env An R environment that stores bookkeeping states of the RDD
+# @slot jrdd Java object reference to the backing JavaRDD
+# to an RDD
+# @export
 setClass("RDD",
          slots = list(env = "environment",
                       jrdd = "jobj"))
@@ -108,14 +108,14 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
   .Object
 })
 
-#' @rdname RDD
-#' @export
-#'
-#' @param jrdd Java object reference to the backing JavaRDD
-#' @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD
-#' stores strings, and "row" if the RDD stores the rows of a DataFrame
-#' @param isCached TRUE if the RDD is cached
-#' @param isCheckpointed TRUE if the RDD has been checkpointed
+# @rdname RDD
+# @export
+#
+# @param jrdd Java object reference to the backing JavaRDD
+# @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD
+# stores strings, and "row" if the RDD stores the rows of a DataFrame
+# @param isCached TRUE if the RDD is cached
+# @param isCheckpointed TRUE if the RDD has been checkpointed
 RDD <- function(jrdd, serializedMode = "byte", isCached = FALSE,
                 isCheckpointed = FALSE) {
   new("RDD", jrdd, serializedMode, isCached, isCheckpointed)
@@ -200,19 +200,19 @@ setValidity("RDD",
 
 ############ Actions and Transformations ############
 
-#' Persist an RDD
-#'
-#' Persist this RDD with the default storage level (MEMORY_ONLY).
-#'
-#' @param x The RDD to cache
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' cache(rdd)
-#'}
-#' @rdname cache-methods
-#' @aliases cache,RDD-method
+# Persist an RDD
+#
+# Persist this RDD with the default storage level (MEMORY_ONLY).
+#
+# @param x The RDD to cache
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# cache(rdd)
+#}
+# @rdname cache-methods
+# @aliases cache,RDD-method
 setMethod("cache",
           signature(x = "RDD"),
           function(x) {
@@ -221,22 +221,22 @@ setMethod("cache",
             x
           })
 
-#' Persist an RDD
-#'
-#' Persist this RDD with the specified storage level. For details of the
-#' supported storage levels, refer to
-#' http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
-#'
-#' @param x The RDD to persist
-#' @param newLevel The new storage level to be assigned
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' persist(rdd, "MEMORY_AND_DISK")
-#'}
-#' @rdname persist
-#' @aliases persist,RDD-method
+# Persist an RDD
+#
+# Persist this RDD with the specified storage level. For details of the
+# supported storage levels, refer to
+# http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence.
+#
+# @param x The RDD to persist
+# @param newLevel The new storage level to be assigned
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# persist(rdd, "MEMORY_AND_DISK")
+#}
+# @rdname persist
+# @aliases persist,RDD-method
 setMethod("persist",
           signature(x = "RDD", newLevel = "character"),
           function(x, newLevel) {
@@ -245,21 +245,21 @@ setMethod("persist",
             x
           })
 
-#' Unpersist an RDD
-#'
-#' Mark the RDD as non-persistent, and remove all blocks for it from memory and
-#' disk.
-#'
-#' @param x The RDD to unpersist
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' cache(rdd) # rdd@@env$isCached == TRUE
-#' unpersist(rdd) # rdd@@env$isCached == FALSE
-#'}
-#' @rdname unpersist-methods
-#' @aliases unpersist,RDD-method
+# Unpersist an RDD
+#
+# Mark the RDD as non-persistent, and remove all blocks for it from memory and
+# disk.
+#
+# @param x The RDD to unpersist
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# cache(rdd) # rdd@@env$isCached == TRUE
+# unpersist(rdd) # rdd@@env$isCached == FALSE
+#}
+# @rdname unpersist-methods
+# @aliases unpersist,RDD-method
 setMethod("unpersist",
           signature(x = "RDD"),
           function(x) {
@@ -268,24 +268,24 @@ setMethod("unpersist",
             x
           })
 
-#' Checkpoint an RDD
-#'
-#' Mark this RDD for checkpointing. It will be saved to a file inside the
-#' checkpoint directory set with setCheckpointDir() and all references to its
-#' parent RDDs will be removed. This function must be called before any job has
-#' been executed on this RDD. It is strongly recommended that this RDD is
-#' persisted in memory, otherwise saving it on a file will require recomputation.
-#'
-#' @param x The RDD to checkpoint
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "checkpoint")
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' checkpoint(rdd)
-#'}
-#' @rdname checkpoint-methods
-#' @aliases checkpoint,RDD-method
+# Checkpoint an RDD
+#
+# Mark this RDD for checkpointing. It will be saved to a file inside the
+# checkpoint directory set with setCheckpointDir() and all references to its
+# parent RDDs will be removed. This function must be called before any job has
+# been executed on this RDD. It is strongly recommended that this RDD is
+# persisted in memory, otherwise saving it on a file will require recomputation.
+#
+# @param x The RDD to checkpoint
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "checkpoint")
+# rdd <- parallelize(sc, 1:10, 2L)
+# checkpoint(rdd)
+#}
+# @rdname checkpoint-methods
+# @aliases checkpoint,RDD-method
 setMethod("checkpoint",
           signature(x = "RDD"),
           function(x) {
@@ -295,18 +295,18 @@ setMethod("checkpoint",
             x
           })
 
-#' Gets the number of partitions of an RDD
-#'
-#' @param x A RDD.
-#' @return the number of partitions of rdd as an integer.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' numPartitions(rdd)  # 2L
-#'}
-#' @rdname numPartitions
-#' @aliases numPartitions,RDD-method
+# Gets the number of partitions of an RDD
+#
+# @param x A RDD.
+# @return the number of partitions of rdd as an integer.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# numPartitions(rdd)  # 2L
+#}
+# @rdname numPartitions
+# @aliases numPartitions,RDD-method
 setMethod("numPartitions",
           signature(x = "RDD"),
           function(x) {
@@ -315,24 +315,24 @@ setMethod("numPartitions",
             callJMethod(partitions, "size")
           })
 
-#' Collect elements of an RDD
-#'
-#' @description
-#' \code{collect} returns a list that contains all of the elements in this RDD.
-#'
-#' @param x The RDD to collect
-#' @param ... Other optional arguments to collect
-#' @param flatten FALSE if the list should not flattened
-#' @return a list containing elements in the RDD
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2L)
-#' collect(rdd) # list from 1 to 10
-#' collectPartition(rdd, 0L) # list from 1 to 5
-#'}
-#' @rdname collect-methods
-#' @aliases collect,RDD-method
+# Collect elements of an RDD
+#
+# @description
+# \code{collect} returns a list that contains all of the elements in this RDD.
+#
+# @param x The RDD to collect
+# @param ... Other optional arguments to collect
+# @param flatten FALSE if the list should not flattened
+# @return a list containing elements in the RDD
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2L)
+# collect(rdd) # list from 1 to 10
+# collectPartition(rdd, 0L) # list from 1 to 5
+#}
+# @rdname collect-methods
+# @aliases collect,RDD-method
 setMethod("collect",
           signature(x = "RDD"),
           function(x, flatten = TRUE) {
@@ -343,12 +343,12 @@ setMethod("collect",
           })
 
 
-#' @description
-#' \code{collectPartition} returns a list that contains all of the elements
-#' in the specified partition of the RDD.
-#' @param partitionId the partition to collect (starts from 0)
-#' @rdname collect-methods
-#' @aliases collectPartition,integer,RDD-method
+# @description
+# \code{collectPartition} returns a list that contains all of the elements
+# in the specified partition of the RDD.
+# @param partitionId the partition to collect (starts from 0)
+# @rdname collect-methods
+# @aliases collectPartition,integer,RDD-method
 setMethod("collectPartition",
           signature(x = "RDD", partitionId = "integer"),
           function(x, partitionId) {
@@ -361,17 +361,17 @@ setMethod("collectPartition",
               serializedMode = getSerializedMode(x))
           })
 
-#' @description
-#' \code{collectAsMap} returns a named list as a map that contains all of the elements
-#' in a key-value pair RDD. 
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L)
-#' collectAsMap(rdd) # list(`1` = 2, `3` = 4)
-#'}
-#' @rdname collect-methods
-#' @aliases collectAsMap,RDD-method
+# @description
+# \code{collectAsMap} returns a named list as a map that contains all of the elements
+# in a key-value pair RDD. 
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L)
+# collectAsMap(rdd) # list(`1` = 2, `3` = 4)
+#}
+# @rdname collect-methods
+# @aliases collectAsMap,RDD-method
 setMethod("collectAsMap",
           signature(x = "RDD"),
           function(x) {
@@ -381,19 +381,19 @@ setMethod("collectAsMap",
             as.list(map)
           })
 
-#' Return the number of elements in the RDD.
-#'
-#' @param x The RDD to count
-#' @return number of elements in the RDD.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' count(rdd) # 10
-#' length(rdd) # Same as count
-#'}
-#' @rdname count
-#' @aliases count,RDD-method
+# Return the number of elements in the RDD.
+#
+# @param x The RDD to count
+# @return number of elements in the RDD.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# count(rdd) # 10
+# length(rdd) # Same as count
+#}
+# @rdname count
+# @aliases count,RDD-method
 setMethod("count",
           signature(x = "RDD"),
           function(x) {
@@ -405,31 +405,31 @@ setMethod("count",
             sum(as.integer(vals))
           })
 
-#' Return the number of elements in the RDD
-#' @export
-#' @rdname count
+# Return the number of elements in the RDD
+# @export
+# @rdname count
 setMethod("length",
           signature(x = "RDD"),
           function(x) {
             count(x)
           })
 
-#' Return the count of each unique value in this RDD as a list of
-#' (value, count) pairs.
-#'
-#' Same as countByValue in Spark.
-#'
-#' @param x The RDD to count
-#' @return list of (value, count) pairs, where count is number of each unique
-#' value in rdd.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, c(1,2,3,2,1))
-#' countByValue(rdd) # (1,2L), (2,2L), (3,1L)
-#'}
-#' @rdname countByValue
-#' @aliases countByValue,RDD-method
+# Return the count of each unique value in this RDD as a list of
+# (value, count) pairs.
+#
+# Same as countByValue in Spark.
+#
+# @param x The RDD to count
+# @return list of (value, count) pairs, where count is number of each unique
+# value in rdd.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, c(1,2,3,2,1))
+# countByValue(rdd) # (1,2L), (2,2L), (3,1L)
+#}
+# @rdname countByValue
+# @aliases countByValue,RDD-method
 setMethod("countByValue",
           signature(x = "RDD"),
           function(x) {
@@ -437,23 +437,23 @@ setMethod("countByValue",
             collect(reduceByKey(ones, `+`, numPartitions(x)))
           })
 
-#' Apply a function to all elements
-#'
-#' This function creates a new RDD by applying the given transformation to all
-#' elements of the given RDD
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each element
-#' @return a new RDD created by the transformation.
-#' @rdname lapply
-#' @aliases lapply
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' multiplyByTwo <- lapply(rdd, function(x) { x * 2 })
-#' collect(multiplyByTwo) # 2,4,6...
-#'}
+# Apply a function to all elements
+#
+# This function creates a new RDD by applying the given transformation to all
+# elements of the given RDD
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each element
+# @return a new RDD created by the transformation.
+# @rdname lapply
+# @aliases lapply
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# multiplyByTwo <- lapply(rdd, function(x) { x * 2 })
+# collect(multiplyByTwo) # 2,4,6...
+#}
 setMethod("lapply",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -463,31 +463,31 @@ setMethod("lapply",
             lapplyPartitionsWithIndex(X, func)
           })
 
-#' @rdname lapply
-#' @aliases map,RDD,function-method
+# @rdname lapply
+# @aliases map,RDD,function-method
 setMethod("map",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapply(X, FUN)
           })
 
-#' Flatten results after apply a function to all elements
-#'
-#' This function return a new RDD by first applying a function to all
-#' elements of this RDD, and then flattening the results.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each element
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) })
-#' collect(multiplyByTwo) # 2,20,4,40,6,60...
-#'}
-#' @rdname flatMap
-#' @aliases flatMap,RDD,function-method
+# Flatten results after apply a function to all elements
+#
+# This function return a new RDD by first applying a function to all
+# elements of this RDD, and then flattening the results.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each element
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) })
+# collect(multiplyByTwo) # 2,20,4,40,6,60...
+#}
+# @rdname flatMap
+# @aliases flatMap,RDD,function-method
 setMethod("flatMap",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -500,83 +500,83 @@ setMethod("flatMap",
             lapplyPartition(X, partitionFunc)
           })
 
-#' Apply a function to each partition of an RDD
-#'
-#' Return a new RDD by applying a function to each partition of this RDD.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each partition.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) })
-#' collect(partitionSum) # 15, 40
-#'}
-#' @rdname lapplyPartition
-#' @aliases lapplyPartition,RDD,function-method
+# Apply a function to each partition of an RDD
+#
+# Return a new RDD by applying a function to each partition of this RDD.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each partition.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) })
+# collect(partitionSum) # 15, 40
+#}
+# @rdname lapplyPartition
+# @aliases lapplyPartition,RDD,function-method
 setMethod("lapplyPartition",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartitionsWithIndex(X, function(s, part) { FUN(part) })
           })
 
-#' mapPartitions is the same as lapplyPartition.
-#'
-#' @rdname lapplyPartition
-#' @aliases mapPartitions,RDD,function-method
+# mapPartitions is the same as lapplyPartition.
+#
+# @rdname lapplyPartition
+# @aliases mapPartitions,RDD,function-method
 setMethod("mapPartitions",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartition(X, FUN)
           })
 
-#' Return a new RDD by applying a function to each partition of this RDD, while
-#' tracking the index of the original partition.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on each partition; takes the partition
-#'        index and a list of elements in the particular partition.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 5L)
-#' prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
-#'                                          partIndex * Reduce("+", part) })
-#' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
-#'}
-#' @rdname lapplyPartitionsWithIndex
-#' @aliases lapplyPartitionsWithIndex,RDD,function-method
+# Return a new RDD by applying a function to each partition of this RDD, while
+# tracking the index of the original partition.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on each partition; takes the partition
+#        index and a list of elements in the particular partition.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 5L)
+# prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) {
+#                                          partIndex * Reduce("+", part) })
+# collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76
+#}
+# @rdname lapplyPartitionsWithIndex
+# @aliases lapplyPartitionsWithIndex,RDD,function-method
 setMethod("lapplyPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             PipelinedRDD(X, FUN)
           })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @aliases mapPartitionsWithIndex,RDD,function-method
+# @rdname lapplyPartitionsWithIndex
+# @aliases mapPartitionsWithIndex,RDD,function-method
 setMethod("mapPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
             lapplyPartitionsWithIndex(X, FUN)
           })
 
-#' This function returns a new RDD containing only the elements that satisfy
-#' a predicate (i.e. returning TRUE in a given logical function).
-#' The same as `filter()' in Spark.
-#'
-#' @param x The RDD to be filtered.
-#' @param f A unary predicate function.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
-#'}
-#' @rdname filterRDD
-#' @aliases filterRDD,RDD,function-method
+# This function returns a new RDD containing only the elements that satisfy
+# a predicate (i.e. returning TRUE in a given logical function).
+# The same as `filter()' in Spark.
+#
+# @param x The RDD to be filtered.
+# @param f A unary predicate function.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
+#}
+# @rdname filterRDD
+# @aliases filterRDD,RDD,function-method
 setMethod("filterRDD",
           signature(x = "RDD", f = "function"),
           function(x, f) {
@@ -586,30 +586,30 @@ setMethod("filterRDD",
             lapplyPartition(x, filter.func)
           })
 
-#' @rdname filterRDD
-#' @aliases Filter
+# @rdname filterRDD
+# @aliases Filter
 setMethod("Filter",
           signature(f = "function", x = "RDD"),
           function(f, x) {
             filterRDD(x, f)
           })
 
-#' Reduce across elements of an RDD.
-#'
-#' This function reduces the elements of this RDD using the
-#' specified commutative and associative binary operator.
-#'
-#' @param x The RDD to reduce
-#' @param func Commutative and associative function to apply on elements
-#'             of the RDD.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' reduce(rdd, "+") # 55
-#'}
-#' @rdname reduce
-#' @aliases reduce,RDD,ANY-method
+# Reduce across elements of an RDD.
+#
+# This function reduces the elements of this RDD using the
+# specified commutative and associative binary operator.
+#
+# @param x The RDD to reduce
+# @param func Commutative and associative function to apply on elements
+#             of the RDD.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# reduce(rdd, "+") # 55
+#}
+# @rdname reduce
+# @aliases reduce,RDD,ANY-method
 setMethod("reduce",
           signature(x = "RDD", func = "ANY"),
           function(x, func) {
@@ -623,70 +623,70 @@ setMethod("reduce",
             Reduce(func, partitionList)
           })
 
-#' Get the maximum element of an RDD.
-#'
-#' @param x The RDD to get the maximum element from
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' maximum(rdd) # 10
-#'}
-#' @rdname maximum
-#' @aliases maximum,RDD
+# Get the maximum element of an RDD.
+#
+# @param x The RDD to get the maximum element from
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# maximum(rdd) # 10
+#}
+# @rdname maximum
+# @aliases maximum,RDD
 setMethod("maximum",
           signature(x = "RDD"),
           function(x) {
             reduce(x, max)
           })
 
-#' Get the minimum element of an RDD.
-#'
-#' @param x The RDD to get the minimum element from
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' minimum(rdd) # 1
-#'}
-#' @rdname minimum
-#' @aliases minimum,RDD
+# Get the minimum element of an RDD.
+#
+# @param x The RDD to get the minimum element from
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# minimum(rdd) # 1
+#}
+# @rdname minimum
+# @aliases minimum,RDD
 setMethod("minimum",
           signature(x = "RDD"),
           function(x) {
             reduce(x, min)
           })
 
-#' Add up the elements in an RDD.
-#'
-#' @param x The RDD to add up the elements in
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' sumRDD(rdd) # 55
-#'}
-#' @rdname sumRDD 
-#' @aliases sumRDD,RDD
+# Add up the elements in an RDD.
+#
+# @param x The RDD to add up the elements in
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# sumRDD(rdd) # 55
+#}
+# @rdname sumRDD 
+# @aliases sumRDD,RDD
 setMethod("sumRDD",
           signature(x = "RDD"),
           function(x) {
             reduce(x, "+")
           })
 
-#' Applies a function to all elements in an RDD, and force evaluation.
-#'
-#' @param x The RDD to apply the function
-#' @param func The function to be applied.
-#' @return invisible NULL.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' foreach(rdd, function(x) { save(x, file=...) })
-#'}
-#' @rdname foreach
-#' @aliases foreach,RDD,function-method
+# Applies a function to all elements in an RDD, and force evaluation.
+#
+# @param x The RDD to apply the function
+# @param func The function to be applied.
+# @return invisible NULL.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# foreach(rdd, function(x) { save(x, file=...) })
+#}
+# @rdname foreach
+# @aliases foreach,RDD,function-method
 setMethod("foreach",
           signature(x = "RDD", func = "function"),
           function(x, func) {
@@ -697,37 +697,37 @@ setMethod("foreach",
             invisible(collect(mapPartitions(x, partition.func)))
           })
 
-#' Applies a function to each partition in an RDD, and force evaluation.
-#'
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' foreachPartition(rdd, function(part) { save(part, file=...); NULL })
-#'}
-#' @rdname foreach
-#' @aliases foreachPartition,RDD,function-method
+# Applies a function to each partition in an RDD, and force evaluation.
+#
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# foreachPartition(rdd, function(part) { save(part, file=...); NULL })
+#}
+# @rdname foreach
+# @aliases foreachPartition,RDD,function-method
 setMethod("foreachPartition",
           signature(x = "RDD", func = "function"),
           function(x, func) {
             invisible(collect(mapPartitions(x, func)))
           })
 
-#' Take elements from an RDD.
-#'
-#' This function takes the first NUM elements in the RDD and
-#' returns them in a list.
-#'
-#' @param x The RDD to take elements from
-#' @param num Number of elements to take
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' take(rdd, 2L) # list(1, 2)
-#'}
-#' @rdname take
-#' @aliases take,RDD,numeric-method
+# Take elements from an RDD.
+#
+# This function takes the first NUM elements in the RDD and
+# returns them in a list.
+#
+# @param x The RDD to take elements from
+# @param num Number of elements to take
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# take(rdd, 2L) # list(1, 2)
+#}
+# @rdname take
+# @aliases take,RDD,numeric-method
 setMethod("take",
           signature(x = "RDD", num = "numeric"),
           function(x, num) {
@@ -762,39 +762,39 @@ setMethod("take",
           })
 
 
-#' First
-#'
-#' Return the first element of an RDD
-#'
-#' @rdname first
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' first(rdd)
-#' }
+# First
+#
+# Return the first element of an RDD
+#
+# @rdname first
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# first(rdd)
+# }
 setMethod("first",
           signature(x = "RDD"),
           function(x) {
             take(x, 1)[[1]]
           })
 
-#' Removes the duplicates from RDD.
-#'
-#' This function returns a new RDD containing the distinct elements in the
-#' given RDD. The same as `distinct()' in Spark.
-#'
-#' @param x The RDD to remove duplicates from.
-#' @param numPartitions Number of partitions to create.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, c(1,2,2,3,3,3))
-#' sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3)
-#'}
-#' @rdname distinct
-#' @aliases distinct,RDD-method
+# Removes the duplicates from RDD.
+#
+# This function returns a new RDD containing the distinct elements in the
+# given RDD. The same as `distinct()' in Spark.
+#
+# @param x The RDD to remove duplicates from.
+# @param numPartitions Number of partitions to create.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, c(1,2,2,3,3,3))
+# sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3)
+#}
+# @rdname distinct
+# @aliases distinct,RDD-method
 setMethod("distinct",
           signature(x = "RDD"),
           function(x, numPartitions = SparkR:::numPartitions(x)) {
@@ -806,24 +806,24 @@ setMethod("distinct",
             resRDD
           })
 
-#' Return an RDD that is a sampled subset of the given RDD.
-#'
-#' The same as `sample()' in Spark. (We rename it due to signature
-#' inconsistencies with the `sample()' function in R's base package.)
-#'
-#' @param x The RDD to sample elements from
-#' @param withReplacement Sampling with replacement or not
-#' @param fraction The (rough) sample target fraction
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
-#' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates
-#'}
-#' @rdname sampleRDD
-#' @aliases sampleRDD,RDD
+# Return an RDD that is a sampled subset of the given RDD.
+#
+# The same as `sample()' in Spark. (We rename it due to signature
+# inconsistencies with the `sample()' function in R's base package.)
+#
+# @param x The RDD to sample elements from
+# @param withReplacement Sampling with replacement or not
+# @param fraction The (rough) sample target fraction
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements
+# collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates
+#}
+# @rdname sampleRDD
+# @aliases sampleRDD,RDD
 setMethod("sampleRDD",
           signature(x = "RDD", withReplacement = "logical",
                     fraction = "numeric", seed = "integer"),
@@ -867,23 +867,23 @@ setMethod("sampleRDD",
             lapplyPartitionsWithIndex(x, samplingFunc)
           })
 
-#' Return a list of the elements that are a sampled subset of the given RDD.
-#'
-#' @param x The RDD to sample elements from
-#' @param withReplacement Sampling with replacement or not
-#' @param num Number of elements to return
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:100)
-#' # exactly 5 elements sampled, which may not be distinct
-#' takeSample(rdd, TRUE, 5L, 1618L)
-#' # exactly 5 distinct elements sampled
-#' takeSample(rdd, FALSE, 5L, 16181618L)
-#'}
-#' @rdname takeSample
-#' @aliases takeSample,RDD
+# Return a list of the elements that are a sampled subset of the given RDD.
+#
+# @param x The RDD to sample elements from
+# @param withReplacement Sampling with replacement or not
+# @param num Number of elements to return
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:100)
+# # exactly 5 elements sampled, which may not be distinct
+# takeSample(rdd, TRUE, 5L, 1618L)
+# # exactly 5 distinct elements sampled
+# takeSample(rdd, FALSE, 5L, 16181618L)
+#}
+# @rdname takeSample
+# @aliases takeSample,RDD
 setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
                                   num = "integer", seed = "integer"),
           function(x, withReplacement, num, seed) {
@@ -930,18 +930,18 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
             sample(samples)[1:total]
           })
 
-#' Creates tuples of the elements in this RDD by applying a function.
-#'
-#' @param x The RDD.
-#' @param func The function to be applied.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3))
-#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
-#'}
-#' @rdname keyBy
-#' @aliases keyBy,RDD
+# Creates tuples of the elements in this RDD by applying a function.
+#
+# @param x The RDD.
+# @param func The function to be applied.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3))
+# collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
+#}
+# @rdname keyBy
+# @aliases keyBy,RDD
 setMethod("keyBy",
           signature(x = "RDD", func = "function"),
           function(x, func) {
@@ -951,44 +951,44 @@ setMethod("keyBy",
             lapply(x, apply.func)
           })
 
-#' Return a new RDD that has exactly numPartitions partitions.
-#' Can increase or decrease the level of parallelism in this RDD. Internally,
-#' this uses a shuffle to redistribute data.
-#' If you are decreasing the number of partitions in this RDD, consider using
-#' coalesce, which can avoid performing a shuffle.
-#'
-#' @param x The RDD.
-#' @param numPartitions Number of partitions to create.
-#' @seealso coalesce
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
-#' numPartitions(rdd)                   # 4
-#' numPartitions(repartition(rdd, 2L))  # 2
-#'}
-#' @rdname repartition
-#' @aliases repartition,RDD
+# Return a new RDD that has exactly numPartitions partitions.
+# Can increase or decrease the level of parallelism in this RDD. Internally,
+# this uses a shuffle to redistribute data.
+# If you are decreasing the number of partitions in this RDD, consider using
+# coalesce, which can avoid performing a shuffle.
+#
+# @param x The RDD.
+# @param numPartitions Number of partitions to create.
+# @seealso coalesce
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
+# numPartitions(rdd)                   # 4
+# numPartitions(repartition(rdd, 2L))  # 2
+#}
+# @rdname repartition
+# @aliases repartition,RDD
 setMethod("repartition",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions) {
             coalesce(x, numPartitions, TRUE)
           })
 
-#' Return a new RDD that is reduced into numPartitions partitions.
-#'
-#' @param x The RDD.
-#' @param numPartitions Number of partitions to create.
-#' @seealso repartition
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
-#' numPartitions(rdd)               # 3
-#' numPartitions(coalesce(rdd, 1L)) # 1
-#'}
-#' @rdname coalesce
-#' @aliases coalesce,RDD
+# Return a new RDD that is reduced into numPartitions partitions.
+#
+# @param x The RDD.
+# @param numPartitions Number of partitions to create.
+# @seealso repartition
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
+# numPartitions(rdd)               # 3
+# numPartitions(coalesce(rdd, 1L)) # 1
+#}
+# @rdname coalesce
+# @aliases coalesce,RDD
 setMethod("coalesce",
            signature(x = "RDD", numPartitions = "numeric"),
            function(x, numPartitions, shuffle = FALSE) {
@@ -1012,19 +1012,19 @@ setMethod("coalesce",
              }
            })
 
-#' Save this RDD as a SequenceFile of serialized objects.
-#'
-#' @param x The RDD to save
-#' @param path The directory where the file is saved
-#' @seealso objectFile
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' saveAsObjectFile(rdd, "/tmp/sparkR-tmp")
-#'}
-#' @rdname saveAsObjectFile
-#' @aliases saveAsObjectFile,RDD
+# Save this RDD as a SequenceFile of serialized objects.
+#
+# @param x The RDD to save
+# @param path The directory where the file is saved
+# @seealso objectFile
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# saveAsObjectFile(rdd, "/tmp/sparkR-tmp")
+#}
+# @rdname saveAsObjectFile
+# @aliases saveAsObjectFile,RDD
 setMethod("saveAsObjectFile",
           signature(x = "RDD", path = "character"),
           function(x, path) {
@@ -1037,18 +1037,18 @@ setMethod("saveAsObjectFile",
             invisible(callJMethod(getJRDD(x), "saveAsObjectFile", path))
           })
 
-#' Save this RDD as a text file, using string representations of elements.
-#'
-#' @param x The RDD to save
-#' @param path The directory where the partitions of the text file are saved
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' saveAsTextFile(rdd, "/tmp/sparkR-tmp")
-#'}
-#' @rdname saveAsTextFile
-#' @aliases saveAsTextFile,RDD
+# Save this RDD as a text file, using string representations of elements.
+#
+# @param x The RDD to save
+# @param path The directory where the partitions of the text file are saved
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# saveAsTextFile(rdd, "/tmp/sparkR-tmp")
+#}
+# @rdname saveAsTextFile
+# @aliases saveAsTextFile,RDD
 setMethod("saveAsTextFile",
           signature(x = "RDD", path = "character"),
           function(x, path) {
@@ -1061,21 +1061,21 @@ setMethod("saveAsTextFile",
               callJMethod(getJRDD(stringRdd, serializedMode = "string"), "saveAsTextFile", path))
           })
 
-#' Sort an RDD by the given key function.
-#'
-#' @param x An RDD to be sorted.
-#' @param func A function used to compute the sort key for each element.
-#' @param ascending A flag to indicate whether the sorting is ascending or descending.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where all elements are sorted.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(3, 2, 1))
-#' collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3)
-#'}
-#' @rdname sortBy
-#' @aliases sortBy,RDD,RDD-method
+# Sort an RDD by the given key function.
+#
+# @param x An RDD to be sorted.
+# @param func A function used to compute the sort key for each element.
+# @param ascending A flag to indicate whether the sorting is ascending or descending.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where all elements are sorted.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(3, 2, 1))
+# collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3)
+#}
+# @rdname sortBy
+# @aliases sortBy,RDD,RDD-method
 setMethod("sortBy",
           signature(x = "RDD", func = "function"),
           function(x, func, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
@@ -1137,97 +1137,97 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
   resList
 }
 
-#' Returns the first N elements from an RDD in ascending order.
-#'
-#' @param x An RDD.
-#' @param num Number of elements to return.
-#' @return The first N elements from the RDD in ascending order.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
-#' takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
-#'}
-#' @rdname takeOrdered
-#' @aliases takeOrdered,RDD,RDD-method
+# Returns the first N elements from an RDD in ascending order.
+#
+# @param x An RDD.
+# @param num Number of elements to return.
+# @return The first N elements from the RDD in ascending order.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
+# takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
+#}
+# @rdname takeOrdered
+# @aliases takeOrdered,RDD,RDD-method
 setMethod("takeOrdered",
           signature(x = "RDD", num = "integer"),
           function(x, num) {          
             takeOrderedElem(x, num)
           })
 
-#' Returns the top N elements from an RDD.
-#'
-#' @param x An RDD.
-#' @param num Number of elements to return.
-#' @return The top N elements from the RDD.
-#' @rdname top
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
-#' top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
-#'}
-#' @rdname top
-#' @aliases top,RDD,RDD-method
+# Returns the top N elements from an RDD.
+#
+# @param x An RDD.
+# @param num Number of elements to return.
+# @return The top N elements from the RDD.
+# @rdname top
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
+# top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
+#}
+# @rdname top
+# @aliases top,RDD,RDD-method
 setMethod("top",
           signature(x = "RDD", num = "integer"),
           function(x, num) {          
             takeOrderedElem(x, num, FALSE)
           })
 
-#' Fold an RDD using a given associative function and a neutral "zero value".
-#'
-#' Aggregate the elements of each partition, and then the results for all the
-#' partitions, using a given associative function and a neutral "zero value".
-#' 
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param op An associative function for the folding operation.
-#' @return The folding result.
-#' @rdname fold
-#' @seealso reduce
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5))
-#' fold(rdd, 0, "+") # 15
-#'}
-#' @rdname fold
-#' @aliases fold,RDD,RDD-method
+# Fold an RDD using a given associative function and a neutral "zero value".
+#
+# Aggregate the elements of each partition, and then the results for all the
+# partitions, using a given associative function and a neutral "zero value".
+# 
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param op An associative function for the folding operation.
+# @return The folding result.
+# @rdname fold
+# @seealso reduce
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4, 5))
+# fold(rdd, 0, "+") # 15
+#}
+# @rdname fold
+# @aliases fold,RDD,RDD-method
 setMethod("fold",
           signature(x = "RDD", zeroValue = "ANY", op = "ANY"),
           function(x, zeroValue, op) {
             aggregateRDD(x, zeroValue, op, op)
           })
 
-#' Aggregate an RDD using the given combine functions and a neutral "zero value".
-#'
-#' Aggregate the elements of each partition, and then the results for all the
-#' partitions, using given combine functions and a neutral "zero value".
-#' 
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param seqOp A function to aggregate the RDD elements. It may return a different
-#'              result type from the type of the RDD elements.
-#' @param combOp A function to aggregate results of seqOp.
-#' @return The aggregation result.
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1, 2, 3, 4))
-#' zeroValue <- list(0, 0)
-#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
-#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-#' aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4)
-#'}
-#' @rdname aggregateRDD
-#' @aliases aggregateRDD,RDD,RDD-method
+# Aggregate an RDD using the given combine functions and a neutral "zero value".
+#
+# Aggregate the elements of each partition, and then the results for all the
+# partitions, using given combine functions and a neutral "zero value".
+# 
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param seqOp A function to aggregate the RDD elements. It may return a different
+#              result type from the type of the RDD elements.
+# @param combOp A function to aggregate results of seqOp.
+# @return The aggregation result.
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1, 2, 3, 4))
+# zeroValue <- list(0, 0)
+# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
+# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
+# aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4)
+#}
+# @rdname aggregateRDD
+# @aliases aggregateRDD,RDD,RDD-method
 setMethod("aggregateRDD",
           signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = "ANY"),
           function(x, zeroValue, seqOp, combOp) {        
@@ -1240,25 +1240,25 @@ setMethod("aggregateRDD",
             Reduce(combOp, partitionList, zeroValue)
           })
 
-#' Pipes elements to a forked external process.
-#'
-#' The same as 'pipe()' in Spark.
-#'
-#' @param x The RDD whose elements are piped to the forked external process.
-#' @param command The command to fork an external process.
-#' @param env A named list to set environment variables of the external process.
-#' @return A new RDD created by piping all elements to a forked external process.
-#' @rdname pipeRDD
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' collect(pipeRDD(rdd, "more")
-#' Output: c("1", "2", ..., "10")
-#'}
-#' @rdname pipeRDD
-#' @aliases pipeRDD,RDD,character-method
+# Pipes elements to a forked external process.
+#
+# The same as 'pipe()' in Spark.
+#
+# @param x The RDD whose elements are piped to the forked external process.
+# @param command The command to fork an external process.
+# @param env A named list to set environment variables of the external process.
+# @return A new RDD created by piping all elements to a forked external process.
+# @rdname pipeRDD
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# collect(pipeRDD(rdd, "more")
+# Output: c("1", "2", ..., "10")
+#}
+# @rdname pipeRDD
+# @aliases pipeRDD,RDD,character-method
 setMethod("pipeRDD",
           signature(x = "RDD", command = "character"),
           function(x, command, env = list()) {
@@ -1274,41 +1274,41 @@ setMethod("pipeRDD",
           })
 
 # TODO: Consider caching the name in the RDD's environment
-#' Return an RDD's name.
-#'
-#' @param x The RDD whose name is returned.
-#' @rdname name
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1,2,3))
-#' name(rdd) # NULL (if not set before)
-#'}
-#' @rdname name
-#' @aliases name,RDD
+# Return an RDD's name.
+#
+# @param x The RDD whose name is returned.
+# @rdname name
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1,2,3))
+# name(rdd) # NULL (if not set before)
+#}
+# @rdname name
+# @aliases name,RDD
 setMethod("name",
           signature(x = "RDD"),
           function(x) {
             callJMethod(getJRDD(x), "name")
           })
 
-#' Set an RDD's name.
-#'
-#' @param x The RDD whose name is to be set.
-#' @param name The RDD name to be set.
-#' @return a new RDD renamed.
-#' @rdname setName
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(1,2,3))
-#' setName(rdd, "myRDD")
-#' name(rdd) # "myRDD"
-#'}
-#' @rdname setName
-#' @aliases setName,RDD
+# Set an RDD's name.
+#
+# @param x The RDD whose name is to be set.
+# @param name The RDD name to be set.
+# @return a new RDD renamed.
+# @rdname setName
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(1,2,3))
+# setName(rdd, "myRDD")
+# name(rdd) # "myRDD"
+#}
+# @rdname setName
+# @aliases setName,RDD
 setMethod("setName",
           signature(x = "RDD", name = "character"),
           function(x, name) {
@@ -1316,25 +1316,25 @@ setMethod("setName",
             x
           })
 
-#' Zip an RDD with generated unique Long IDs.
-#'
-#' Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
-#' n is the number of partitions. So there may exist gaps, but this
-#' method won't trigger a spark job, which is different from
-#' zipWithIndex.
-#'
-#' @param x An RDD to be zipped.
-#' @return An RDD with zipped items.
-#' @seealso zipWithIndex
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-#' collect(zipWithUniqueId(rdd)) 
-#' # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
-#'}
-#' @rdname zipWithUniqueId
-#' @aliases zipWithUniqueId,RDD
+# Zip an RDD with generated unique Long IDs.
+#
+# Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
+# n is the number of partitions. So there may exist gaps, but this
+# method won't trigger a spark job, which is different from
+# zipWithIndex.
+#
+# @param x An RDD to be zipped.
+# @return An RDD with zipped items.
+# @seealso zipWithIndex
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
+# collect(zipWithUniqueId(rdd)) 
+# # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2))
+#}
+# @rdname zipWithUniqueId
+# @aliases zipWithUniqueId,RDD
 setMethod("zipWithUniqueId",
           signature(x = "RDD"),
           function(x) {
@@ -1353,28 +1353,28 @@ setMethod("zipWithUniqueId",
             lapplyPartitionsWithIndex(x, partitionFunc)
           })
 
-#' Zip an RDD with its element indices.
-#'
-#' The ordering is first based on the partition index and then the
-#' ordering of items within each partition. So the first item in
-#' the first partition gets index 0, and the last item in the last
-#' partition receives the largest index.
-#'
-#' This method needs to trigger a Spark job when this RDD contains
-#' more than one partition.
-#'
-#' @param x An RDD to be zipped.
-#' @return An RDD with zipped items.
-#' @seealso zipWithUniqueId
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
-#' collect(zipWithIndex(rdd))
-#' # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4))
-#'}
-#' @rdname zipWithIndex
-#' @aliases zipWithIndex,RDD
+# Zip an RDD with its element indices.
+#
+# The ordering is first based on the partition index and then the
+# ordering of items within each partition. So the first item in
+# the first partition gets index 0, and the last item in the last
+# partition receives the largest index.
+#
+# This method needs to trigger a Spark job when this RDD contains
+# more than one partition.
+#
+# @param x An RDD to be zipped.
+# @return An RDD with zipped items.
+# @seealso zipWithUniqueId
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
+# collect(zipWithIndex(rdd))
+# # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4))
+#}
+# @rdname zipWithIndex
+# @aliases zipWithIndex,RDD
 setMethod("zipWithIndex",
           signature(x = "RDD"),
           function(x) {
@@ -1406,20 +1406,20 @@ setMethod("zipWithIndex",
            lapplyPartitionsWithIndex(x, partitionFunc)
          })
 
-#' Coalesce all elements within each partition of an RDD into a list.
-#'
-#' @param x An RDD.
-#' @return An RDD created by coalescing all elements within
-#'         each partition into a list.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, as.list(1:4), 2L)
-#' collect(glom(rdd))
-#' # list(list(1, 2), list(3, 4))
-#'}
-#' @rdname glom
-#' @aliases glom,RDD
+# Coalesce all elements within each partition of an RDD into a list.
+#
+# @param x An RDD.
+# @return An RDD created by coalescing all elements within
+#         each partition into a list.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, as.list(1:4), 2L)
+# collect(glom(rdd))
+# # list(list(1, 2), list(3, 4))
+#}
+# @rdname glom
+# @aliases glom,RDD
 setMethod("glom",
           signature(x = "RDD"),
           function(x) {
@@ -1432,21 +1432,21 @@ setMethod("glom",
 
 ############ Binary Functions #############
 
-#' Return the union RDD of two RDDs.
-#' The same as union() in Spark.
-#'
-#' @param x An RDD.
-#' @param y An RDD.
-#' @return a new RDD created by performing the simple union (witout removing
-#' duplicates) of two input RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3)
-#' unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3
-#'}
-#' @rdname unionRDD
-#' @aliases unionRDD,RDD,RDD-method
+# Return the union RDD of two RDDs.
+# The same as union() in Spark.
+#
+# @param x An RDD.
+# @param y An RDD.
+# @return a new RDD created by performing the simple union (witout removing
+# duplicates) of two input RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3)
+# unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3
+#}
+# @rdname unionRDD
+# @aliases unionRDD,RDD,RDD-method
 setMethod("unionRDD",
           signature(x = "RDD", y = "RDD"),
           function(x, y) {
@@ -1463,27 +1463,27 @@ setMethod("unionRDD",
             union.rdd
           })
 
-#' Zip an RDD with another RDD.
-#'
-#' Zips this RDD with another one, returning key-value pairs with the
-#' first element in each RDD second element in each RDD, etc. Assumes
-#' that the two RDDs have the same number of partitions and the same
-#' number of elements in each partition (e.g. one was made through
-#' a map on the other).
-#'
-#' @param x An RDD to be zipped.
-#' @param other Another RDD to be zipped.
-#' @return An RDD zipped from the two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, 0:4)
-#' rdd2 <- parallelize(sc, 1000:1004)
-#' collect(zipRDD(rdd1, rdd2))
-#' # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004))
-#'}
-#' @rdname zipRDD
-#' @aliases zipRDD,RDD
+# Zip an RDD with another RDD.
+#
+# Zips this RDD with another one, returning key-value pairs with the
+# first element in each RDD second element in each RDD, etc. Assumes
+# that the two RDDs have the same number of partitions and the same
+# number of elements in each partition (e.g. one was made through
+# a map on the other).
+#
+# @param x An RDD to be zipped.
+# @param other Another RDD to be zipped.
+# @return An RDD zipped from the two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, 0:4)
+# rdd2 <- parallelize(sc, 1000:1004)
+# collect(zipRDD(rdd1, rdd2))
+# # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004))
+#}
+# @rdname zipRDD
+# @aliases zipRDD,RDD
 setMethod("zipRDD",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
@@ -1502,24 +1502,24 @@ setMethod("zipRDD",
             mergePartitions(rdd, TRUE)
           })
 
-#' Cartesian product of this RDD and another one.
-#'
-#' Return the Cartesian product of this RDD and another one, 
-#' that is, the RDD of all pairs of elements (a, b) where a 
-#' is in this and b is in other.
-#' 
-#' @param x An RDD.
-#' @param other An RDD.
-#' @return A new RDD which is the Cartesian product of these two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2)
-#' sortByKey(cartesian(rdd, rdd)) 
-#' # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
-#'}
-#' @rdname cartesian
-#' @aliases cartesian,RDD,RDD-method
+# Cartesian product of this RDD and another one.
+#
+# Return the Cartesian product of this RDD and another one, 
+# that is, the RDD of all pairs of elements (a, b) where a 
+# is in this and b is in other.
+# 
+# @param x An RDD.
+# @param other An RDD.
+# @return A new RDD which is the Cartesian product of these two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2)
+# sortByKey(cartesian(rdd, rdd)) 
+# # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2))
+#}
+# @rdname cartesian
+# @aliases cartesian,RDD,RDD-method
 setMethod("cartesian",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
@@ -1532,24 +1532,24 @@ setMethod("cartesian",
             mergePartitions(rdd, FALSE)
           })
 
-#' Subtract an RDD with another RDD.
-#'
-#' Return an RDD with the elements from this that are not in other.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions Number of the partitions in the result RDD.
-#' @return An RDD with the elements from this that are not in other.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4))
-#' rdd2 <- parallelize(sc, list(2, 4))
-#' collect(subtract(rdd1, rdd2))
-#' # list(1, 1, 3)
-#'}
-#' @rdname subtract
-#' @aliases subtract,RDD
+# Subtract an RDD with another RDD.
+#
+# Return an RDD with the elements from this that are not in other.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions Number of the partitions in the result RDD.
+# @return An RDD with the elements from this that are not in other.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4))
+# rdd2 <- parallelize(sc, list(2, 4))
+# collect(subtract(rdd1, rdd2))
+# # list(1, 1, 3)
+#}
+# @rdname subtract
+# @aliases subtract,RDD
 setMethod("subtract",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -1559,28 +1559,28 @@ setMethod("subtract",
             keys(subtractByKey(rdd1, rdd2, numPartitions))
           })
 
-#' Intersection of this RDD and another one.
-#'
-#' Return the intersection of this RDD and another one.
-#' The output will not contain any duplicate elements,
-#' even if the input RDDs did. Performs a hash partition
-#' across the cluster.
-#' Note that this method performs a shuffle internally.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions The number of partitions in the result RDD.
-#' @return An RDD which is the intersection of these two RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5))
-#' rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8))
-#' collect(sortBy(intersection(rdd1, rdd2), function(x) { x }))
-#' # list(1, 2, 3)
-#'}
-#' @rdname intersection
-#' @aliases intersection,RDD
+# Intersection of this RDD and another one.
+#
+# Return the intersection of this RDD and another one.
+# The output will not contain any duplicate elements,
+# even if the input RDDs did. Performs a hash partition
+# across the cluster.
+# Note that this method performs a shuffle internally.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions The number of partitions in the result RDD.
+# @return An RDD which is the intersection of these two RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5))
+# rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8))
+# collect(sortBy(intersection(rdd1, rdd2), function(x) { x }))
+# # list(1, 2, 3)
+#}
+# @rdname intersection
+# @aliases intersection,RDD
 setMethod("intersection",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -1596,26 +1596,26 @@ setMethod("intersection",
             keys(filterRDD(cogroup(rdd1, rdd2, numPartitions = numPartitions), filterFunction))
           })
 
-#' Zips an RDD's partitions with one (or more) RDD(s).
-#' Same as zipPartitions in Spark.
-#' 
-#' @param ... RDDs to be zipped.
-#' @param func A function to transform zipped partitions.
-#' @return A new RDD by applying a function to the zipped partitions. 
-#'         Assumes that all the RDDs have the *same number of partitions*, but 
-#'         does *not* require them to have the same number of elements in each partition.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
-#' rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
-#' rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
-#' collect(zipPartitions(rdd1, rdd2, rdd3, 
-#'                       func = function(x, y, z) { list(list(x, y, z))} ))
-#' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
-#'}
-#' @rdname zipRDD
-#' @aliases zipPartitions,RDD
+# Zips an RDD's partitions with one (or more) RDD(s).
+# Same as zipPartitions in Spark.
+# 
+# @param ... RDDs to be zipped.
+# @param func A function to transform zipped partitions.
+# @return A new RDD by applying a function to the zipped partitions. 
+#         Assumes that all the RDDs have the *same number of partitions*, but 
+#         does *not* require them to have the same number of elements in each partition.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
+# rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
+# rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
+# collect(zipPartitions(rdd1, rdd2, rdd3, 
+#                       func = function(x, y, z) { list(list(x, y, z))} ))
+# # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6)))
+#}
+# @rdname zipRDD
+# @aliases zipPartitions,RDD
 setMethod("zipPartitions",
           "RDD",
           function(..., func) {
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 4f05ba524a01a..cae06e6af2bff 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
   dataFrame(sdf)
 }
 
-#' toDF
-#'
-#' Converts an RDD to a DataFrame by infer the types.
-#'
-#' @param x An RDD
-#'
-#' @rdname DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
-#' df <- toDF(rdd)
-#' }
+# toDF
+#
+# Converts an RDD to a DataFrame by infer the types.
+#
+# @param x An RDD
+#
+# @rdname DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
+# df <- toDF(rdd)
+# }
 
 setGeneric("toDF", function(x, ...) { standardGeneric("toDF") })
 
@@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) {
 }
 
 
-#' JSON RDD
-#'
-#' Loads an RDD storing one JSON object per string as a DataFrame.
-#'
-#' @param sqlCtx SQLContext to use
-#' @param rdd An RDD of JSON string
-#' @param schema A StructType object to use as schema
-#' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A DataFrame
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' sqlCtx <- sparkRSQL.init(sc)
-#' rdd <- texFile(sc, "path/to/json")
-#' df <- jsonRDD(sqlCtx, rdd)
-#' }
+# JSON RDD
+#
+# Loads an RDD storing one JSON object per string as a DataFrame.
+#
+# @param sqlCtx SQLContext to use
+# @param rdd An RDD of JSON string
+# @param schema A StructType object to use as schema
+# @param samplingRatio The ratio of simpling used to infer the schema
+# @return A DataFrame
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# sqlCtx <- sparkRSQL.init(sc)
+# rdd <- texFile(sc, "path/to/json")
+# df <- jsonRDD(sqlCtx, rdd)
+# }
 
 # TODO: support schema
 jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {
diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R
index 583fa2e7fdcfd..23dc38780716e 100644
--- a/R/pkg/R/broadcast.R
+++ b/R/pkg/R/broadcast.R
@@ -23,21 +23,21 @@
 .broadcastValues <- new.env()
 .broadcastIdToName <- new.env()
 
-#' @title S4 class that represents a Broadcast variable
-#' @description Broadcast variables can be created using the broadcast
-#'              function from a \code{SparkContext}.
-#' @rdname broadcast-class
-#' @seealso broadcast 
-#'
-#' @param id Id of the backing Spark broadcast variable 
-#' @export
+# @title S4 class that represents a Broadcast variable
+# @description Broadcast variables can be created using the broadcast
+#              function from a \code{SparkContext}.
+# @rdname broadcast-class
+# @seealso broadcast 
+#
+# @param id Id of the backing Spark broadcast variable 
+# @export
 setClass("Broadcast", slots = list(id = "character"))
 
-#' @rdname broadcast-class
-#' @param value Value of the broadcast variable
-#' @param jBroadcastRef reference to the backing Java broadcast object
-#' @param objName name of broadcasted object
-#' @export
+# @rdname broadcast-class
+# @param value Value of the broadcast variable
+# @param jBroadcastRef reference to the backing Java broadcast object
+# @param objName name of broadcasted object
+# @export
 Broadcast <- function(id, value, jBroadcastRef, objName) {
   .broadcastValues[[id]] <- value
   .broadcastNames[[as.character(objName)]] <- jBroadcastRef
@@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) {
   new("Broadcast", id = id)
 }
 
-#' @description
-#' \code{value} can be used to get the value of a broadcast variable inside
-#' a distributed function.
-#'
-#' @param bcast The broadcast variable to get
-#' @rdname broadcast
-#' @aliases value,Broadcast-method
+# @description
+# \code{value} can be used to get the value of a broadcast variable inside
+# a distributed function.
+#
+# @param bcast The broadcast variable to get
+# @rdname broadcast
+# @aliases value,Broadcast-method
 setMethod("value",
           signature(bcast = "Broadcast"),
           function(bcast) {
@@ -62,24 +62,24 @@ setMethod("value",
             }
           })
 
-#' Internal function to set values of a broadcast variable.
-#'
-#' This function is used internally by Spark to set the value of a broadcast
-#' variable on workers. Not intended for use outside the package.
-#'
-#' @rdname broadcast-internal
-#' @seealso broadcast, value 
+# Internal function to set values of a broadcast variable.
+#
+# This function is used internally by Spark to set the value of a broadcast
+# variable on workers. Not intended for use outside the package.
+#
+# @rdname broadcast-internal
+# @seealso broadcast, value 
 
-#' @param bcastId The id of broadcast variable to set
-#' @param value The value to be set
-#' @export
+# @param bcastId The id of broadcast variable to set
+# @param value The value to be set
+# @export
 setBroadcastValue <- function(bcastId, value) {
   bcastIdStr <- as.character(bcastId)
   .broadcastValues[[bcastIdStr]] <- value
 }
 
-#' Helper function to clear the list of broadcast variables we know about
-#' Should be called when the SparkR JVM backend is shutdown
+# Helper function to clear the list of broadcast variables we know about
+# Should be called when the SparkR JVM backend is shutdown
 clearBroadcastVariables <- function() {
   bcasts <- ls(.broadcastNames)
   rm(list = bcasts, envir = .broadcastNames)
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index b4845b6948997..43be9c904fdf6 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) {
   as.integer(minPartitions)
 }
 
-#' Create an RDD from a text file.
-#'
-#' This function reads a text file from HDFS, a local file system (available on all
-#' nodes), or any Hadoop-supported file system URI, and creates an
-#' RDD of strings from it.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD where each item is of type \code{character}
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  lines <- textFile(sc, "myfile.txt")
-#'}
+# Create an RDD from a text file.
+#
+# This function reads a text file from HDFS, a local file system (available on all
+# nodes), or any Hadoop-supported file system URI, and creates an
+# RDD of strings from it.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD where each item is of type \code{character}
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  lines <- textFile(sc, "myfile.txt")
+#}
 textFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions))
@@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "string")
 }
 
-#' Load an RDD saved as a SequenceFile containing serialized objects.
-#'
-#' The file to be loaded should be one that was previously generated by calling
-#' saveAsObjectFile() of the RDD class.
-#'
-#' @param sc SparkContext to use
-#' @param path Path of file to read. A vector of multiple paths is allowed.
-#' @param minPartitions Minimum number of partitions to be created. If NULL, the default
-#'  value is chosen based on available parallelism.
-#' @return RDD containing serialized R objects.
-#' @seealso saveAsObjectFile
-#' @export
-#' @examples
-#'\dontrun{
-#'  sc <- sparkR.init()
-#'  rdd <- objectFile(sc, "myfile")
-#'}
+# Load an RDD saved as a SequenceFile containing serialized objects.
+#
+# The file to be loaded should be one that was previously generated by calling
+# saveAsObjectFile() of the RDD class.
+#
+# @param sc SparkContext to use
+# @param path Path of file to read. A vector of multiple paths is allowed.
+# @param minPartitions Minimum number of partitions to be created. If NULL, the default
+#  value is chosen based on available parallelism.
+# @return RDD containing serialized R objects.
+# @seealso saveAsObjectFile
+# @export
+# @examples
+#\dontrun{
+#  sc <- sparkR.init()
+#  rdd <- objectFile(sc, "myfile")
+#}
 objectFile <- function(sc, path, minPartitions = NULL) {
   # Allow the user to have a more flexible definiton of the text file path
   path <- suppressWarnings(normalizePath(path))
-  #' Convert a string vector of paths to a string containing comma separated paths
+  # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
 
   jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions))
@@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) {
   RDD(jrdd, "byte")
 }
 
-#' Create an RDD from a homogeneous list or vector.
-#'
-#' This function creates an RDD from a local homogeneous list in R. The elements
-#' in the list are split into \code{numSlices} slices and distributed to nodes
-#' in the cluster.
-#'
-#' @param sc SparkContext to use
-#' @param coll collection to parallelize
-#' @param numSlices number of partitions to create in the RDD
-#' @return an RDD created from this collection
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10, 2)
-#' # The RDD should contain 10 elements
-#' length(rdd)
-#'}
+# Create an RDD from a homogeneous list or vector.
+#
+# This function creates an RDD from a local homogeneous list in R. The elements
+# in the list are split into \code{numSlices} slices and distributed to nodes
+# in the cluster.
+#
+# @param sc SparkContext to use
+# @param coll collection to parallelize
+# @param numSlices number of partitions to create in the RDD
+# @return an RDD created from this collection
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10, 2)
+# # The RDD should contain 10 elements
+# length(rdd)
+#}
 parallelize <- function(sc, coll, numSlices = 1) {
   # TODO: bound/safeguard numSlices
   # TODO: unit tests for if the split works for all primitives
@@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) {
   RDD(jrdd, "byte")
 }
 
-#' Include this specified package on all workers
-#'
-#' This function can be used to include a package on all workers before the
-#' user's code is executed. This is useful in scenarios where other R package
-#' functions are used in a function passed to functions like \code{lapply}.
-#' NOTE: The package is assumed to be installed on every node in the Spark
-#' cluster.
-#'
-#' @param sc SparkContext to use
-#' @param pkg Package name
-#'
-#' @export
-#' @examples
-#'\dontrun{
-#'  library(Matrix)
-#'
-#'  sc <- sparkR.init()
-#'  # Include the matrix library we will be using
-#'  includePackage(sc, Matrix)
-#'
-#'  generateSparse <- function(x) {
-#'    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
-#'  }
-#'
-#'  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
-#'  collect(rdd)
-#'}
+# Include this specified package on all workers
+#
+# This function can be used to include a package on all workers before the
+# user's code is executed. This is useful in scenarios where other R package
+# functions are used in a function passed to functions like \code{lapply}.
+# NOTE: The package is assumed to be installed on every node in the Spark
+# cluster.
+#
+# @param sc SparkContext to use
+# @param pkg Package name
+#
+# @export
+# @examples
+#\dontrun{
+#  library(Matrix)
+#
+#  sc <- sparkR.init()
+#  # Include the matrix library we will be using
+#  includePackage(sc, Matrix)
+#
+#  generateSparse <- function(x) {
+#    sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3))
+#  }
+#
+#  rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse)
+#  collect(rdd)
+#}
 includePackage <- function(sc, pkg) {
   pkg <- as.character(substitute(pkg))
   if (exists(".packages", .sparkREnv)) {
@@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) {
   .sparkREnv$.packages <- packages
 }
 
-#' @title Broadcast a variable to all workers
-#'
-#' @description
-#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
-#' object for reading it in distributed functions.
-#'
-#' @param sc Spark Context to use
-#' @param object Object to be broadcast
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:2, 2L)
-#'
-#' # Large Matrix object that we want to broadcast
-#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
-#' randomMatBr <- broadcast(sc, randomMat)
-#'
-#' # Use the broadcast variable inside the function
-#' useBroadcast <- function(x) {
-#'   sum(value(randomMatBr) * x)
-#' }
-#' sumRDD <- lapply(rdd, useBroadcast)
-#'}
+# @title Broadcast a variable to all workers
+#
+# @description
+# Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
+# object for reading it in distributed functions.
+#
+# @param sc Spark Context to use
+# @param object Object to be broadcast
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:2, 2L)
+#
+# # Large Matrix object that we want to broadcast
+# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
+# randomMatBr <- broadcast(sc, randomMat)
+#
+# # Use the broadcast variable inside the function
+# useBroadcast <- function(x) {
+#   sum(value(randomMatBr) * x)
+# }
+# sumRDD <- lapply(rdd, useBroadcast)
+#}
 broadcast <- function(sc, object) {
   objName <- as.character(substitute(object))
   serializedObj <- serialize(object, connection = NULL)
@@ -205,21 +205,21 @@ broadcast <- function(sc, object) {
   Broadcast(id, object, jBroadcast, objName)
 }
 
-#' @title Set the checkpoint directory
-#'
-#' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
-#'
-#' @param sc Spark Context to use
-#' @param dirName Directory path
-#' @export
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' setCheckpointDir(sc, "~/checkpoint")
-#' rdd <- parallelize(sc, 1:2, 2L)
-#' checkpoint(rdd)
-#'}
+# @title Set the checkpoint directory
+#
+# Set the directory under which RDDs are going to be checkpointed. The
+# directory must be a HDFS path if running on a cluster.
+#
+# @param sc Spark Context to use
+# @param dirName Directory path
+# @export
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# setCheckpointDir(sc, "~/checkpoint")
+# rdd <- parallelize(sc, 1:2, 2L)
+# checkpoint(rdd)
+#}
 setCheckpointDir <- function(sc, dirName) {
   invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName))))
 }
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5838955f74dad..380e8ebe8c8f4 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -17,353 +17,353 @@
 
 ############ RDD Actions and Transformations ############
 
-#' @rdname aggregateRDD
-#' @seealso reduce
-#' @export
+# @rdname aggregateRDD
+# @seealso reduce
+# @export
 setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
 
-#' @rdname cache-methods
-#' @export
+# @rdname cache-methods
+# @export
 setGeneric("cache", function(x) { standardGeneric("cache") })
 
-#' @rdname coalesce
-#' @seealso repartition
-#' @export
+# @rdname coalesce
+# @seealso repartition
+# @export
 setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") })
 
-#' @rdname checkpoint-methods
-#' @export
+# @rdname checkpoint-methods
+# @export
 setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collect", function(x, ...) { standardGeneric("collect") })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") })
 
-#' @rdname collect-methods
-#' @export
+# @rdname collect-methods
+# @export
 setGeneric("collectPartition",
            function(x, partitionId) {
              standardGeneric("collectPartition")
            })
 
-#' @rdname count
-#' @export
+# @rdname count
+# @export
 setGeneric("count", function(x) { standardGeneric("count") })
 
-#' @rdname countByValue
-#' @export
+# @rdname countByValue
+# @export
 setGeneric("countByValue", function(x) { standardGeneric("countByValue") })
 
-#' @rdname distinct
-#' @export
+# @rdname distinct
+# @export
 setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") })
 
-#' @rdname filterRDD
-#' @export
+# @rdname filterRDD
+# @export
 setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") })
 
-#' @rdname first
-#' @export
+# @rdname first
+# @export
 setGeneric("first", function(x) { standardGeneric("first") })
 
-#' @rdname flatMap
-#' @export
+# @rdname flatMap
+# @export
 setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") })
 
-#' @rdname fold
-#' @seealso reduce
-#' @export
+# @rdname fold
+# @seealso reduce
+# @export
 setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric("foreach", function(x, func) { standardGeneric("foreach") })
 
-#' @rdname foreach
-#' @export
+# @rdname foreach
+# @export
 setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") })
 
 # The jrdd accessor function.
 setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") })
 
-#' @rdname glom
-#' @export
+# @rdname glom
+# @export
 setGeneric("glom", function(x) { standardGeneric("glom") })
 
-#' @rdname keyBy
-#' @export
+# @rdname keyBy
+# @export
 setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric("lapplyPartitionsWithIndex",
            function(X, FUN) {
              standardGeneric("lapplyPartitionsWithIndex")
            })
 
-#' @rdname lapply
-#' @export
+# @rdname lapply
+# @export
 setGeneric("map", function(X, FUN) { standardGeneric("map") })
 
-#' @rdname lapplyPartition
-#' @export
+# @rdname lapplyPartition
+# @export
 setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") })
 
-#' @rdname lapplyPartitionsWithIndex
-#' @export
+# @rdname lapplyPartitionsWithIndex
+# @export
 setGeneric("mapPartitionsWithIndex",
            function(X, FUN) { standardGeneric("mapPartitionsWithIndex") })
 
-#' @rdname maximum
-#' @export
+# @rdname maximum
+# @export
 setGeneric("maximum", function(x) { standardGeneric("maximum") })
 
-#' @rdname minimum
-#' @export
+# @rdname minimum
+# @export
 setGeneric("minimum", function(x) { standardGeneric("minimum") })
 
-#' @rdname sumRDD 
-#' @export
+# @rdname sumRDD 
+# @export
 setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
 
-#' @rdname name
-#' @export
+# @rdname name
+# @export
 setGeneric("name", function(x) { standardGeneric("name") })
 
-#' @rdname numPartitions
-#' @export
+# @rdname numPartitions
+# @export
 setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })
 
-#' @rdname persist
-#' @export
+# @rdname persist
+# @export
 setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
 
-#' @rdname pipeRDD
-#' @export
+# @rdname pipeRDD
+# @export
 setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")})
 
-#' @rdname reduce
-#' @export
+# @rdname reduce
+# @export
 setGeneric("reduce", function(x, func) { standardGeneric("reduce") })
 
-#' @rdname repartition
-#' @seealso coalesce
-#' @export
+# @rdname repartition
+# @seealso coalesce
+# @export
 setGeneric("repartition", function(x, numPartitions) { standardGeneric("repartition") })
 
-#' @rdname sampleRDD
-#' @export
+# @rdname sampleRDD
+# @export
 setGeneric("sampleRDD",
            function(x, withReplacement, fraction, seed) {
              standardGeneric("sampleRDD")
            })
 
-#' @rdname saveAsObjectFile
-#' @seealso objectFile
-#' @export
+# @rdname saveAsObjectFile
+# @seealso objectFile
+# @export
 setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") })
 
-#' @rdname saveAsTextFile
-#' @export
+# @rdname saveAsTextFile
+# @export
 setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") })
 
-#' @rdname setName
-#' @export
+# @rdname setName
+# @export
 setGeneric("setName", function(x, name) { standardGeneric("setName") })
 
-#' @rdname sortBy
-#' @export
+# @rdname sortBy
+# @export
 setGeneric("sortBy",
            function(x, func, ascending = TRUE, numPartitions = 1) {
              standardGeneric("sortBy")
            })
 
-#' @rdname take
-#' @export
+# @rdname take
+# @export
 setGeneric("take", function(x, num) { standardGeneric("take") })
 
-#' @rdname takeOrdered
-#' @export
+# @rdname takeOrdered
+# @export
 setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") })
 
-#' @rdname takeSample
-#' @export
+# @rdname takeSample
+# @export
 setGeneric("takeSample",
            function(x, withReplacement, num, seed) {
              standardGeneric("takeSample")
            })
 
-#' @rdname top
-#' @export
+# @rdname top
+# @export
 setGeneric("top", function(x, num) { standardGeneric("top") })
 
-#' @rdname unionRDD
-#' @export
+# @rdname unionRDD
+# @export
 setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") })
 
-#' @rdname unpersist-methods
-#' @export
+# @rdname unpersist-methods
+# @export
 setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })
 
-#' @rdname zipRDD
-#' @export
+# @rdname zipRDD
+# @export
 setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") })
 
-#' @rdname zipRDD
-#' @export
+# @rdname zipRDD
+# @export
 setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") }, 
            signature = "...")
 
-#' @rdname zipWithIndex
-#' @seealso zipWithUniqueId
-#' @export
+# @rdname zipWithIndex
+# @seealso zipWithUniqueId
+# @export
 setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") })
 
-#' @rdname zipWithUniqueId
-#' @seealso zipWithIndex
-#' @export
+# @rdname zipWithUniqueId
+# @seealso zipWithIndex
+# @export
 setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") })
 
 
 ############ Binary Functions #############
 
-#' @rdname cartesian
-#' @export
+# @rdname cartesian
+# @export
 setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") })
 
-#' @rdname countByKey
-#' @export
+# @rdname countByKey
+# @export
 setGeneric("countByKey", function(x) { standardGeneric("countByKey") })
 
-#' @rdname flatMapValues
-#' @export
+# @rdname flatMapValues
+# @export
 setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") })
 
-#' @rdname intersection
-#' @export
+# @rdname intersection
+# @export
 setGeneric("intersection", function(x, other, numPartitions = 1) {
   standardGeneric("intersection") })
 
-#' @rdname keys
-#' @export
+# @rdname keys
+# @export
 setGeneric("keys", function(x) { standardGeneric("keys") })
 
-#' @rdname lookup
-#' @export
+# @rdname lookup
+# @export
 setGeneric("lookup", function(x, key) { standardGeneric("lookup") })
 
-#' @rdname mapValues
-#' @export
+# @rdname mapValues
+# @export
 setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") })
 
-#' @rdname sampleByKey
-#' @export
+# @rdname sampleByKey
+# @export
 setGeneric("sampleByKey",
            function(x, withReplacement, fractions, seed) {
              standardGeneric("sampleByKey")
            })
 
-#' @rdname values
-#' @export
+# @rdname values
+# @export
 setGeneric("values", function(x) { standardGeneric("values") })
 
 
 ############ Shuffle Functions ############
 
-#' @rdname aggregateByKey
-#' @seealso foldByKey, combineByKey
-#' @export
+# @rdname aggregateByKey
+# @seealso foldByKey, combineByKey
+# @export
 setGeneric("aggregateByKey",
            function(x, zeroValue, seqOp, combOp, numPartitions) {
              standardGeneric("aggregateByKey")
            })
 
-#' @rdname cogroup
-#' @export
+# @rdname cogroup
+# @export
 setGeneric("cogroup",
            function(..., numPartitions) {
              standardGeneric("cogroup")
            },
            signature = "...")
 
-#' @rdname combineByKey
-#' @seealso groupByKey, reduceByKey
-#' @export
+# @rdname combineByKey
+# @seealso groupByKey, reduceByKey
+# @export
 setGeneric("combineByKey",
            function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) {
              standardGeneric("combineByKey")
            })
 
-#' @rdname foldByKey
-#' @seealso aggregateByKey, combineByKey
-#' @export
+# @rdname foldByKey
+# @seealso aggregateByKey, combineByKey
+# @export
 setGeneric("foldByKey",
            function(x, zeroValue, func, numPartitions) {
              standardGeneric("foldByKey")
            })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") })
 
-#' @rdname groupByKey
-#' @seealso reduceByKey
-#' @export
+# @rdname groupByKey
+# @seealso reduceByKey
+# @export
 setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("join", function(x, y, ...) { standardGeneric("join") })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") })
 
-#' @rdname partitionBy
-#' @export
+# @rdname partitionBy
+# @export
 setGeneric("partitionBy", function(x, numPartitions, ...) { standardGeneric("partitionBy") })
 
-#' @rdname reduceByKey
-#' @seealso groupByKey
-#' @export
+# @rdname reduceByKey
+# @seealso groupByKey
+# @export
 setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")})
 
-#' @rdname reduceByKeyLocally
-#' @seealso reduceByKey
-#' @export
+# @rdname reduceByKeyLocally
+# @seealso reduceByKey
+# @export
 setGeneric("reduceByKeyLocally",
            function(x, combineFunc) {
              standardGeneric("reduceByKeyLocally")
            })
 
-#' @rdname join-methods
-#' @export
+# @rdname join-methods
+# @export
 setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") })
 
-#' @rdname sortByKey
-#' @export
+# @rdname sortByKey
+# @export
 setGeneric("sortByKey",
            function(x, ascending = TRUE, numPartitions = 1) {
              standardGeneric("sortByKey")
            })
 
-#' @rdname subtract
-#' @export
+# @rdname subtract
+# @export
 setGeneric("subtract",
            function(x, other, numPartitions = 1) {
              standardGeneric("subtract")
            })
 
-#' @rdname subtractByKey
-#' @export
+# @rdname subtractByKey
+# @export
 setGeneric("subtractByKey", 
            function(x, other, numPartitions = 1) {
              standardGeneric("subtractByKey")
@@ -372,8 +372,8 @@ setGeneric("subtractByKey",
 
 ################### Broadcast Variable Methods #################
 
-#' @rdname broadcast
-#' @export
+# @rdname broadcast
+# @export
 setGeneric("value", function(bcast) { standardGeneric("value") })
 
 
@@ -477,8 +477,8 @@ setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
 #' @export
 setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") })
 
-#' @rdname tojson
-#' @export
+# @rdname tojson
+# @export
 setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
 
 #' @rdname DataFrame
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index edeb8d9f75f7b..7694652856da5 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -21,23 +21,23 @@ NULL
 
 ############ Actions and Transformations ############
 
-#' Look up elements of a key in an RDD
-#'
-#' @description
-#' \code{lookup} returns a list of values in this RDD for key key.
-#'
-#' @param x The RDD to collect
-#' @param key The key to look up for
-#' @return a list of values in this RDD for key key
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(c(1, 1), c(2, 2), c(1, 3))
-#' rdd <- parallelize(sc, pairs)
-#' lookup(rdd, 1) # list(1, 3)
-#'}
-#' @rdname lookup
-#' @aliases lookup,RDD-method
+# Look up elements of a key in an RDD
+#
+# @description
+# \code{lookup} returns a list of values in this RDD for key key.
+#
+# @param x The RDD to collect
+# @param key The key to look up for
+# @return a list of values in this RDD for key key
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(c(1, 1), c(2, 2), c(1, 3))
+# rdd <- parallelize(sc, pairs)
+# lookup(rdd, 1) # list(1, 3)
+#}
+# @rdname lookup
+# @aliases lookup,RDD-method
 setMethod("lookup",
           signature(x = "RDD", key = "ANY"),
           function(x, key) {
@@ -49,21 +49,21 @@ setMethod("lookup",
             collect(valsRDD)
           })
 
-#' Count the number of elements for each key, and return the result to the
-#' master as lists of (key, count) pairs.
-#'
-#' Same as countByKey in Spark.
-#'
-#' @param x The RDD to count keys.
-#' @return list of (key, count) pairs, where count is number of each key in rdd.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(c("a", 1), c("b", 1), c("a", 1)))
-#' countByKey(rdd) # ("a", 2L), ("b", 1L)
-#'}
-#' @rdname countByKey
-#' @aliases countByKey,RDD-method
+# Count the number of elements for each key, and return the result to the
+# master as lists of (key, count) pairs.
+#
+# Same as countByKey in Spark.
+#
+# @param x The RDD to count keys.
+# @return list of (key, count) pairs, where count is number of each key in rdd.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(c("a", 1), c("b", 1), c("a", 1)))
+# countByKey(rdd) # ("a", 2L), ("b", 1L)
+#}
+# @rdname countByKey
+# @aliases countByKey,RDD-method
 setMethod("countByKey",
           signature(x = "RDD"),
           function(x) {
@@ -71,17 +71,17 @@ setMethod("countByKey",
             countByValue(keys)
           })
 
-#' Return an RDD with the keys of each tuple.
-#'
-#' @param x The RDD from which the keys of each tuple is returned.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
-#' collect(keys(rdd)) # list(1, 3)
-#'}
-#' @rdname keys
-#' @aliases keys,RDD
+# Return an RDD with the keys of each tuple.
+#
+# @param x The RDD from which the keys of each tuple is returned.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
+# collect(keys(rdd)) # list(1, 3)
+#}
+# @rdname keys
+# @aliases keys,RDD
 setMethod("keys",
           signature(x = "RDD"),
           function(x) {
@@ -91,17 +91,17 @@ setMethod("keys",
             lapply(x, func)
           })
 
-#' Return an RDD with the values of each tuple.
-#'
-#' @param x The RDD from which the values of each tuple is returned.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
-#' collect(values(rdd)) # list(2, 4)
-#'}
-#' @rdname values
-#' @aliases values,RDD
+# Return an RDD with the values of each tuple.
+#
+# @param x The RDD from which the values of each tuple is returned.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
+# collect(values(rdd)) # list(2, 4)
+#}
+# @rdname values
+# @aliases values,RDD
 setMethod("values",
           signature(x = "RDD"),
           function(x) {
@@ -111,23 +111,23 @@ setMethod("values",
             lapply(x, func)
           })
 
-#' Applies a function to all values of the elements, without modifying the keys.
-#'
-#' The same as `mapValues()' in Spark.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on the value of each element.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:10)
-#' makePairs <- lapply(rdd, function(x) { list(x, x) })
-#' collect(mapValues(makePairs, function(x) { x * 2) })
-#' Output: list(list(1,2), list(2,4), list(3,6), ...)
-#'}
-#' @rdname mapValues
-#' @aliases mapValues,RDD,function-method
+# Applies a function to all values of the elements, without modifying the keys.
+#
+# The same as `mapValues()' in Spark.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on the value of each element.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:10)
+# makePairs <- lapply(rdd, function(x) { list(x, x) })
+# collect(mapValues(makePairs, function(x) { x * 2) })
+# Output: list(list(1,2), list(2,4), list(3,6), ...)
+#}
+# @rdname mapValues
+# @aliases mapValues,RDD,function-method
 setMethod("mapValues",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -137,23 +137,23 @@ setMethod("mapValues",
             lapply(X, func)
           })
 
-#' Pass each value in the key-value pair RDD through a flatMap function without
-#' changing the keys; this also retains the original RDD's partitioning.
-#'
-#' The same as 'flatMapValues()' in Spark.
-#'
-#' @param X The RDD to apply the transformation.
-#' @param FUN the transformation to apply on the value of each element.
-#' @return a new RDD created by the transformation.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4))))
-#' collect(flatMapValues(rdd, function(x) { x }))
-#' Output: list(list(1,1), list(1,2), list(2,3), list(2,4))
-#'}
-#' @rdname flatMapValues
-#' @aliases flatMapValues,RDD,function-method
+# Pass each value in the key-value pair RDD through a flatMap function without
+# changing the keys; this also retains the original RDD's partitioning.
+#
+# The same as 'flatMapValues()' in Spark.
+#
+# @param X The RDD to apply the transformation.
+# @param FUN the transformation to apply on the value of each element.
+# @return a new RDD created by the transformation.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4))))
+# collect(flatMapValues(rdd, function(x) { x }))
+# Output: list(list(1,1), list(1,2), list(2,3), list(2,4))
+#}
+# @rdname flatMapValues
+# @aliases flatMapValues,RDD,function-method
 setMethod("flatMapValues",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
@@ -165,30 +165,30 @@ setMethod("flatMapValues",
 
 ############ Shuffle Functions ############
 
-#' Partition an RDD by key
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' For each element of this RDD, the partitioner is used to compute a hash
-#' function and the RDD is partitioned using this hash value.
-#'
-#' @param x The RDD to partition. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @param ... Other optional arguments to partitionBy.
-#'
-#' @param partitionFunc The partition function to use. Uses a default hashCode
-#'                      function if not provided
-#' @return An RDD partitioned using the specified partitioner.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- partitionBy(rdd, 2L)
-#' collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4)
-#'}
-#' @rdname partitionBy
-#' @aliases partitionBy,RDD,integer-method
+# Partition an RDD by key
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# For each element of this RDD, the partitioner is used to compute a hash
+# function and the RDD is partitioned using this hash value.
+#
+# @param x The RDD to partition. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param numPartitions Number of partitions to create.
+# @param ... Other optional arguments to partitionBy.
+#
+# @param partitionFunc The partition function to use. Uses a default hashCode
+#                      function if not provided
+# @return An RDD partitioned using the specified partitioner.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- partitionBy(rdd, 2L)
+# collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4)
+#}
+# @rdname partitionBy
+# @aliases partitionBy,RDD,integer-method
 setMethod("partitionBy",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions, partitionFunc = hashCode) {
@@ -234,27 +234,27 @@ setMethod("partitionBy",
             RDD(r, serializedMode = "byte")
           })
 
-#' Group values by key
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and group values for each key in the RDD into a single sequence.
-#'
-#' @param x The RDD to group. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where each element is list(K, list(V))
-#' @seealso reduceByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- groupByKey(rdd, 2L)
-#' grouped <- collect(parts)
-#' grouped[[1]] # Should be a list(1, list(2, 4))
-#'}
-#' @rdname groupByKey
-#' @aliases groupByKey,RDD,integer-method
+# Group values by key
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# and group values for each key in the RDD into a single sequence.
+#
+# @param x The RDD to group. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param numPartitions Number of partitions to create.
+# @return An RDD where each element is list(K, list(V))
+# @seealso reduceByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- groupByKey(rdd, 2L)
+# grouped <- collect(parts)
+# grouped[[1]] # Should be a list(1, list(2, 4))
+#}
+# @rdname groupByKey
+# @aliases groupByKey,RDD,integer-method
 setMethod("groupByKey",
           signature(x = "RDD", numPartitions = "numeric"),
           function(x, numPartitions) {
@@ -292,28 +292,28 @@ setMethod("groupByKey",
             lapplyPartition(shuffled, groupVals)
           })
 
-#' Merge values by key
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and merges the values for each key using an associative reduce function.
-#'
-#' @param x The RDD to reduce by key. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param combineFunc The associative reduce function to use.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where each element is list(K, V') where V' is the merged
-#'         value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- reduceByKey(rdd, "+", 2L)
-#' reduced <- collect(parts)
-#' reduced[[1]] # Should be a list(1, 6)
-#'}
-#' @rdname reduceByKey
-#' @aliases reduceByKey,RDD,integer-method
+# Merge values by key
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# and merges the values for each key using an associative reduce function.
+#
+# @param x The RDD to reduce by key. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param combineFunc The associative reduce function to use.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where each element is list(K, V') where V' is the merged
+#         value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- reduceByKey(rdd, "+", 2L)
+# reduced <- collect(parts)
+# reduced[[1]] # Should be a list(1, 6)
+#}
+# @rdname reduceByKey
+# @aliases reduceByKey,RDD,integer-method
 setMethod("reduceByKey",
           signature(x = "RDD", combineFunc = "ANY", numPartitions = "numeric"),
           function(x, combineFunc, numPartitions) {
@@ -333,27 +333,27 @@ setMethod("reduceByKey",
             lapplyPartition(shuffled, reduceVals)
           })
 
-#' Merge values by key locally
-#'
-#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
-#' and merges the values for each key using an associative reduce function, but return the
-#' results immediately to the driver as an R list.
-#'
-#' @param x The RDD to reduce by key. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param combineFunc The associative reduce function to use.
-#' @return A list of elements of type list(K, V') where V' is the merged value for each key
-#' @seealso reduceByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' reduced <- reduceByKeyLocally(rdd, "+")
-#' reduced # list(list(1, 6), list(1.1, 3))
-#'}
-#' @rdname reduceByKeyLocally
-#' @aliases reduceByKeyLocally,RDD,integer-method
+# Merge values by key locally
+#
+# This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
+# and merges the values for each key using an associative reduce function, but return the
+# results immediately to the driver as an R list.
+#
+# @param x The RDD to reduce by key. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param combineFunc The associative reduce function to use.
+# @return A list of elements of type list(K, V') where V' is the merged value for each key
+# @seealso reduceByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# reduced <- reduceByKeyLocally(rdd, "+")
+# reduced # list(list(1, 6), list(1.1, 3))
+#}
+# @rdname reduceByKeyLocally
+# @aliases reduceByKeyLocally,RDD,integer-method
 setMethod("reduceByKeyLocally",
           signature(x = "RDD", combineFunc = "ANY"),
           function(x, combineFunc) {
@@ -385,41 +385,41 @@ setMethod("reduceByKeyLocally",
             convertEnvsToList(merged[[1]], merged[[2]])
           })
 
-#' Combine values by key
-#'
-#' Generic function to combine the elements for each key using a custom set of
-#' aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)],
-#' for a "combined type" C. Note that V and C can be different -- for example, one
-#' might group an RDD of type (Int, Int) into an RDD of type (Int, Seq[Int]).
-
-#' Users provide three functions:
-#' \itemize{
-#'   \item createCombiner, which turns a V into a C (e.g., creates a one-element list)
-#'   \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) -
-#'   \item mergeCombiners, to combine two C's into a single one (e.g., concatentates
-#'    two lists).
-#' }
-#'
-#' @param x The RDD to combine. Should be an RDD where each element is
-#'             list(K, V) or c(K, V).
-#' @param createCombiner Create a combiner (C) given a value (V)
-#' @param mergeValue Merge the given value (V) with an existing combiner (C)
-#' @param mergeCombiners Merge two combiners and return a new combiner
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where each element is list(K, C) where C is the combined type
-#'
-#' @seealso groupByKey, reduceByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
-#' rdd <- parallelize(sc, pairs)
-#' parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L)
-#' combined <- collect(parts)
-#' combined[[1]] # Should be a list(1, 6)
-#'}
-#' @rdname combineByKey
-#' @aliases combineByKey,RDD,ANY,ANY,ANY,integer-method
+# Combine values by key
+#
+# Generic function to combine the elements for each key using a custom set of
+# aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)],
+# for a "combined type" C. Note that V and C can be different -- for example, one
+# might group an RDD of type (Int, Int) into an RDD of type (Int, Seq[Int]).
+
+# Users provide three functions:
+# \itemize{
+#   \item createCombiner, which turns a V into a C (e.g., creates a one-element list)
+#   \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) -
+#   \item mergeCombiners, to combine two C's into a single one (e.g., concatentates
+#    two lists).
+# }
+#
+# @param x The RDD to combine. Should be an RDD where each element is
+#             list(K, V) or c(K, V).
+# @param createCombiner Create a combiner (C) given a value (V)
+# @param mergeValue Merge the given value (V) with an existing combiner (C)
+# @param mergeCombiners Merge two combiners and return a new combiner
+# @param numPartitions Number of partitions to create.
+# @return An RDD where each element is list(K, C) where C is the combined type
+#
+# @seealso groupByKey, reduceByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
+# rdd <- parallelize(sc, pairs)
+# parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L)
+# combined <- collect(parts)
+# combined[[1]] # Should be a list(1, 6)
+#}
+# @rdname combineByKey
+# @aliases combineByKey,RDD,ANY,ANY,ANY,integer-method
 setMethod("combineByKey",
           signature(x = "RDD", createCombiner = "ANY", mergeValue = "ANY",
                     mergeCombiners = "ANY", numPartitions = "numeric"),
@@ -451,36 +451,36 @@ setMethod("combineByKey",
             lapplyPartition(shuffled, mergeAfterShuffle)
           })
 
-#' Aggregate a pair RDD by each key.
-#'
-#' Aggregate the values of each key in an RDD, using given combine functions
-#' and a neutral "zero value". This function can return a different result type,
-#' U, than the type of the values in this RDD, V. Thus, we need one operation
-#' for merging a V into a U and one operation for merging two U's, The former
-#' operation is used for merging values within a partition, and the latter is
-#' used for merging values between partitions. To avoid memory allocation, both
-#' of these functions are allowed to modify and return their first argument
-#' instead of creating a new U.
-#'
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param seqOp A function to aggregate the values of each key. It may return
-#'              a different result type from the type of the values.
-#' @param combOp A function to aggregate results of seqOp.
-#' @return An RDD containing the aggregation result.
-#' @seealso foldByKey, combineByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
-#' zeroValue <- list(0, 0)
-#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
-#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
-#' aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
-#'   # list(list(1, list(3, 2)), list(2, list(7, 2)))
-#'}
-#' @rdname aggregateByKey
-#' @aliases aggregateByKey,RDD,ANY,ANY,ANY,integer-method
+# Aggregate a pair RDD by each key.
+#
+# Aggregate the values of each key in an RDD, using given combine functions
+# and a neutral "zero value". This function can return a different result type,
+# U, than the type of the values in this RDD, V. Thus, we need one operation
+# for merging a V into a U and one operation for merging two U's, The former
+# operation is used for merging values within a partition, and the latter is
+# used for merging values between partitions. To avoid memory allocation, both
+# of these functions are allowed to modify and return their first argument
+# instead of creating a new U.
+#
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param seqOp A function to aggregate the values of each key. It may return
+#              a different result type from the type of the values.
+# @param combOp A function to aggregate results of seqOp.
+# @return An RDD containing the aggregation result.
+# @seealso foldByKey, combineByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
+# zeroValue <- list(0, 0)
+# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
+# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) }
+# aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L)
+#   # list(list(1, list(3, 2)), list(2, list(7, 2)))
+#}
+# @rdname aggregateByKey
+# @aliases aggregateByKey,RDD,ANY,ANY,ANY,integer-method
 setMethod("aggregateByKey",
           signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY",
                     combOp = "ANY", numPartitions = "numeric"),
@@ -492,26 +492,26 @@ setMethod("aggregateByKey",
             combineByKey(x, createCombiner, seqOp, combOp, numPartitions)
           })
 
-#' Fold a pair RDD by each key.
-#'
-#' Aggregate the values of each key in an RDD, using an associative function "func"
-#' and a neutral "zero value" which may be added to the result an arbitrary
-#' number of times, and must not change the result (e.g., 0 for addition, or
-#' 1 for multiplication.).
-#'
-#' @param x An RDD.
-#' @param zeroValue A neutral "zero value".
-#' @param func An associative function for folding values of each key.
-#' @return An RDD containing the aggregation result.
-#' @seealso aggregateByKey, combineByKey
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
-#' foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7))
-#'}
-#' @rdname foldByKey
-#' @aliases foldByKey,RDD,ANY,ANY,integer-method
+# Fold a pair RDD by each key.
+#
+# Aggregate the values of each key in an RDD, using an associative function "func"
+# and a neutral "zero value" which may be added to the result an arbitrary
+# number of times, and must not change the result (e.g., 0 for addition, or
+# 1 for multiplication.).
+#
+# @param x An RDD.
+# @param zeroValue A neutral "zero value".
+# @param func An associative function for folding values of each key.
+# @return An RDD containing the aggregation result.
+# @seealso aggregateByKey, combineByKey
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
+# foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7))
+#}
+# @rdname foldByKey
+# @aliases foldByKey,RDD,ANY,ANY,integer-method
 setMethod("foldByKey",
           signature(x = "RDD", zeroValue = "ANY",
                     func = "ANY", numPartitions = "numeric"),
@@ -521,28 +521,28 @@ setMethod("foldByKey",
 
 ############ Binary Functions #############
 
-#' Join two RDDs
-#'
-#' @description
-#' \code{join} This function joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return a new RDD containing all pairs of elements with matching keys in
-#'         two input RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3))
-#'}
-#' @rdname join-methods
-#' @aliases join,RDD,RDD-method
+# Join two RDDs
+#
+# @description
+# \code{join} This function joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return a new RDD containing all pairs of elements with matching keys in
+#         two input RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3))
+#}
+# @rdname join-methods
+# @aliases join,RDD,RDD-method
 setMethod("join",
           signature(x = "RDD", y = "RDD"),
           function(x, y, numPartitions) {
@@ -557,30 +557,30 @@ setMethod("join",
                                     doJoin)
           })
 
-#' Left outer join two RDDs
-#'
-#' @description
-#' \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return For each element (k, v) in x, the resulting RDD will either contain
-#'         all pairs (k, (v, w)) for (k, w) in rdd2, or the pair (k, (v, NULL))
-#'         if no elements in rdd2 have key k.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' leftOuterJoin(rdd1, rdd2, 2L)
-#' # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL)))
-#'}
-#' @rdname join-methods
-#' @aliases leftOuterJoin,RDD,RDD-method
+# Left outer join two RDDs
+#
+# @description
+# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return For each element (k, v) in x, the resulting RDD will either contain
+#         all pairs (k, (v, w)) for (k, w) in rdd2, or the pair (k, (v, NULL))
+#         if no elements in rdd2 have key k.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# leftOuterJoin(rdd1, rdd2, 2L)
+# # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL)))
+#}
+# @rdname join-methods
+# @aliases leftOuterJoin,RDD,RDD-method
 setMethod("leftOuterJoin",
           signature(x = "RDD", y = "RDD", numPartitions = "numeric"),
           function(x, y, numPartitions) {
@@ -594,30 +594,30 @@ setMethod("leftOuterJoin",
             joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin)
           })
 
-#' Right outer join two RDDs
-#'
-#' @description
-#' \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return For each element (k, w) in y, the resulting RDD will either contain
-#'         all pairs (k, (v, w)) for (k, v) in x, or the pair (k, (NULL, w))
-#'         if no elements in x have key k.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rightOuterJoin(rdd1, rdd2, 2L)
-#' # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)))
-#'}
-#' @rdname join-methods
-#' @aliases rightOuterJoin,RDD,RDD-method
+# Right outer join two RDDs
+#
+# @description
+# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return For each element (k, w) in y, the resulting RDD will either contain
+#         all pairs (k, (v, w)) for (k, v) in x, or the pair (k, (NULL, w))
+#         if no elements in x have key k.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rightOuterJoin(rdd1, rdd2, 2L)
+# # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)))
+#}
+# @rdname join-methods
+# @aliases rightOuterJoin,RDD,RDD-method
 setMethod("rightOuterJoin",
           signature(x = "RDD", y = "RDD", numPartitions = "numeric"),
           function(x, y, numPartitions) {
@@ -631,33 +631,33 @@ setMethod("rightOuterJoin",
             joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin)
           })
 
-#' Full outer join two RDDs
-#'
-#' @description
-#' \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
-#' The key types of the two RDDs should be the same.
-#'
-#' @param x An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param y An RDD to be joined. Should be an RDD where each element is
-#'             list(K, V).
-#' @param numPartitions Number of partitions to create.
-#' @return For each element (k, v) in x and (k, w) in y, the resulting RDD
-#'         will contain all pairs (k, (v, w)) for both (k, v) in x and
-#'         (k, w) in y, or the pair (k, (NULL, w))/(k, (v, NULL)) if no elements
-#'         in x/y have key k.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3)))
-#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)),
-#'                               #      list(1, list(3, 1)),
-#'                               #      list(2, list(NULL, 4)))
-#'                               #      list(3, list(3, NULL)),
-#'}
-#' @rdname join-methods
-#' @aliases fullOuterJoin,RDD,RDD-method
+# Full outer join two RDDs
+#
+# @description
+# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
+# The key types of the two RDDs should be the same.
+#
+# @param x An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param y An RDD to be joined. Should be an RDD where each element is
+#             list(K, V).
+# @param numPartitions Number of partitions to create.
+# @return For each element (k, v) in x and (k, w) in y, the resulting RDD
+#         will contain all pairs (k, (v, w)) for both (k, v) in x and
+#         (k, w) in y, or the pair (k, (NULL, w))/(k, (v, NULL)) if no elements
+#         in x/y have key k.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3)))
+# rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)),
+#                               #      list(1, list(3, 1)),
+#                               #      list(2, list(NULL, 4)))
+#                               #      list(3, list(3, NULL)),
+#}
+# @rdname join-methods
+# @aliases fullOuterJoin,RDD,RDD-method
 setMethod("fullOuterJoin",
           signature(x = "RDD", y = "RDD", numPartitions = "numeric"),
           function(x, y, numPartitions) {
@@ -671,23 +671,23 @@ setMethod("fullOuterJoin",
             joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin)
           })
 
-#' For each key k in several RDDs, return a resulting RDD that
-#' whose values are a list of values for the key in all RDDs.
-#'
-#' @param ... Several RDDs.
-#' @param numPartitions Number of partitions to create.
-#' @return a new RDD containing all pairs of elements with values in a list
-#' in all RDDs.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-#' cogroup(rdd1, rdd2, numPartitions = 2L)
-#' # list(list(1, list(1, list(2, 3))), list(2, list(list(4), list()))
-#'}
-#' @rdname cogroup
-#' @aliases cogroup,RDD-method
+# For each key k in several RDDs, return a resulting RDD that
+# whose values are a list of values for the key in all RDDs.
+#
+# @param ... Several RDDs.
+# @param numPartitions Number of partitions to create.
+# @return a new RDD containing all pairs of elements with values in a list
+# in all RDDs.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
+# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
+# cogroup(rdd1, rdd2, numPartitions = 2L)
+# # list(list(1, list(1, list(2, 3))), list(2, list(list(4), list()))
+#}
+# @rdname cogroup
+# @aliases cogroup,RDD-method
 setMethod("cogroup",
           "RDD",
           function(..., numPartitions) {
@@ -723,20 +723,20 @@ setMethod("cogroup",
                                      group.func)
           })
 
-#' Sort a (k, v) pair RDD by k.
-#'
-#' @param x A (k, v) pair RDD to be sorted.
-#' @param ascending A flag to indicate whether the sorting is ascending or descending.
-#' @param numPartitions Number of partitions to create.
-#' @return An RDD where all (k, v) pair elements are sorted.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3)))
-#' collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1))
-#'}
-#' @rdname sortByKey
-#' @aliases sortByKey,RDD,RDD-method
+# Sort a (k, v) pair RDD by k.
+#
+# @param x A (k, v) pair RDD to be sorted.
+# @param ascending A flag to indicate whether the sorting is ascending or descending.
+# @param numPartitions Number of partitions to create.
+# @return An RDD where all (k, v) pair elements are sorted.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3)))
+# collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1))
+#}
+# @rdname sortByKey
+# @aliases sortByKey,RDD,RDD-method
 setMethod("sortByKey",
           signature(x = "RDD"),
           function(x, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
@@ -785,25 +785,25 @@ setMethod("sortByKey",
             lapplyPartition(newRDD, partitionFunc)
           })
           
-#' Subtract a pair RDD with another pair RDD.
-#'
-#' Return an RDD with the pairs from x whose keys are not in other.
-#'
-#' @param x An RDD.
-#' @param other An RDD.
-#' @param numPartitions Number of the partitions in the result RDD.
-#' @return An RDD with the pairs from x whose keys are not in other.
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4),
-#'                              list("b", 5), list("a", 2)))
-#' rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1)))
-#' collect(subtractByKey(rdd1, rdd2))
-#' # list(list("b", 4), list("b", 5))
-#'}
-#' @rdname subtractByKey
-#' @aliases subtractByKey,RDD
+# Subtract a pair RDD with another pair RDD.
+#
+# Return an RDD with the pairs from x whose keys are not in other.
+#
+# @param x An RDD.
+# @param other An RDD.
+# @param numPartitions Number of the partitions in the result RDD.
+# @return An RDD with the pairs from x whose keys are not in other.
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4),
+#                              list("b", 5), list("a", 2)))
+# rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1)))
+# collect(subtractByKey(rdd1, rdd2))
+# # list(list("b", 4), list("b", 5))
+#}
+# @rdname subtractByKey
+# @aliases subtractByKey,RDD
 setMethod("subtractByKey",
           signature(x = "RDD", other = "RDD"),
           function(x, other, numPartitions = SparkR:::numPartitions(x)) {
@@ -819,41 +819,41 @@ setMethod("subtractByKey",
                           function (v) { v[[1]] })
           })
 
-#' Return a subset of this RDD sampled by key.
-#' 
-#' @description
-#' \code{sampleByKey} Create a sample of this RDD using variable sampling rates
-#' for different keys as specified by fractions, a key to sampling rate map.
-#'
-#' @param x The RDD to sample elements by key, where each element is
-#'             list(K, V) or c(K, V).
-#' @param withReplacement Sampling with replacement or not
-#' @param fraction The (rough) sample target fraction
-#' @param seed Randomness seed value
-#' @examples
-#'\dontrun{
-#' sc <- sparkR.init()
-#' rdd <- parallelize(sc, 1:3000)
-#' pairs <- lapply(rdd, function(x) { if (x %% 3 == 0) list("a", x)
-#'                                    else { if (x %% 3 == 1) list("b", x) else list("c", x) }})
-#' fractions <- list(a = 0.2, b = 0.1, c = 0.3)
-#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L)
-#' 100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE
-#' 50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE
-#' 200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE
-#' lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE
-#' lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE
-#' lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE
-#' lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE
-#' lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE
-#' lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE
-#' fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4)
-#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored
-#' fractions <- list(a = 0.2, b = 0.1)
-#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c"
-#'}
-#' @rdname sampleByKey
-#' @aliases sampleByKey,RDD-method
+# Return a subset of this RDD sampled by key.
+# 
+# @description
+# \code{sampleByKey} Create a sample of this RDD using variable sampling rates
+# for different keys as specified by fractions, a key to sampling rate map.
+#
+# @param x The RDD to sample elements by key, where each element is
+#             list(K, V) or c(K, V).
+# @param withReplacement Sampling with replacement or not
+# @param fraction The (rough) sample target fraction
+# @param seed Randomness seed value
+# @examples
+#\dontrun{
+# sc <- sparkR.init()
+# rdd <- parallelize(sc, 1:3000)
+# pairs <- lapply(rdd, function(x) { if (x %% 3 == 0) list("a", x)
+#                                    else { if (x %% 3 == 1) list("b", x) else list("c", x) }})
+# fractions <- list(a = 0.2, b = 0.1, c = 0.3)
+# sample <- sampleByKey(pairs, FALSE, fractions, 1618L)
+# 100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE
+# 50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE
+# 200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE
+# lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE
+# lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE
+# lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE
+# lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE
+# lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE
+# lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE
+# fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4)
+# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored
+# fractions <- list(a = 0.2, b = 0.1)
+# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c"
+#}
+# @rdname sampleByKey
+# @aliases sampleByKey,RDD-method
 setMethod("sampleByKey",
           signature(x = "RDD", withReplacement = "logical",
                     fractions = "vector", seed = "integer"),

From 35d6a99cbe3f67da5d56888e63baf9bc69f3de91 Mon Sep 17 00:00:00 2001
From: Jacek Lewandowski <lewandowski.jacek@gmail.com>
Date: Fri, 8 May 2015 11:36:30 -0700
Subject: [PATCH 038/320] [SPARK-7436] Fixed instantiation of custom recovery
 mode factory and added tests

Author: Jacek Lewandowski <lewandowski.jacek@gmail.com>

Closes #5977 from jacek-lewandowski/SPARK-7436 and squashes the following commits:

ff0a3c2 [Jacek Lewandowski] SPARK-7436: Fixed instantiation of custom recovery mode factory and added tests
---
 .../apache/spark/deploy/master/Master.scala   |   2 +-
 .../master/CustomRecoveryModeFactory.scala    | 110 ++++++++++++++++++
 .../spark/deploy/master/MasterSuite.scala     | 100 +++++++++++++++-
 3 files changed, 208 insertions(+), 4 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 53e1903a3d125..fccceb3ea528b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -165,7 +165,7 @@ private[master] class Master(
         (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
       case "CUSTOM" =>
         val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory"))
-        val factory = clazz.getConstructor(conf.getClass, Serialization.getClass)
+        val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serialization])
           .newInstance(conf, SerializationExtension(context.system))
           .asInstanceOf[StandaloneRecoveryModeFactory]
         (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala b/core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala
new file mode 100644
index 0000000000000..f4e56632e426a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is placed in different package to make sure all of these components work well
+// when they are outside of org.apache.spark.
+package other.supplier
+
+import scala.collection.mutable
+import scala.reflect.ClassTag
+
+import akka.serialization.Serialization
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.master._
+
+class CustomRecoveryModeFactory(
+  conf: SparkConf,
+  serialization: Serialization
+) extends StandaloneRecoveryModeFactory(conf, serialization) {
+
+  CustomRecoveryModeFactory.instantiationAttempts += 1
+
+  /**
+   * PersistenceEngine defines how the persistent data(Information about worker, driver etc..)
+   * is handled for recovery.
+   *
+   */
+  override def createPersistenceEngine(): PersistenceEngine =
+    new CustomPersistenceEngine(serialization)
+
+  /**
+   * Create an instance of LeaderAgent that decides who gets elected as master.
+   */
+  override def createLeaderElectionAgent(master: LeaderElectable): LeaderElectionAgent =
+    new CustomLeaderElectionAgent(master)
+}
+
+object CustomRecoveryModeFactory {
+  @volatile var instantiationAttempts = 0
+}
+
+class CustomPersistenceEngine(serialization: Serialization) extends PersistenceEngine {
+  val data = mutable.HashMap[String, Array[Byte]]()
+
+  CustomPersistenceEngine.lastInstance = Some(this)
+
+  /**
+   * Defines how the object is serialized and persisted. Implementation will
+   * depend on the store used.
+   */
+  override def persist(name: String, obj: Object): Unit = {
+    CustomPersistenceEngine.persistAttempts += 1
+    serialization.serialize(obj) match {
+      case util.Success(bytes) => data += name -> bytes
+      case util.Failure(cause) => throw new RuntimeException(cause)
+    }
+  }
+
+  /**
+   * Defines how the object referred by its name is removed from the store.
+   */
+  override def unpersist(name: String): Unit = {
+    CustomPersistenceEngine.unpersistAttempts += 1
+    data -= name
+  }
+
+  /**
+   * Gives all objects, matching a prefix. This defines how objects are
+   * read/deserialized back.
+   */
+  override def read[T: ClassTag](prefix: String): Seq[T] = {
+    CustomPersistenceEngine.readAttempts += 1
+    val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]]
+    val results = for ((name, bytes) <- data; if name.startsWith(prefix))
+      yield serialization.deserialize(bytes, clazz)
+
+    results.find(_.isFailure).foreach {
+      case util.Failure(cause) => throw new RuntimeException(cause)
+    }
+
+    results.flatMap(_.toOption).toSeq
+  }
+}
+
+object CustomPersistenceEngine {
+  @volatile var persistAttempts = 0
+  @volatile var unpersistAttempts = 0
+  @volatile var readAttempts = 0
+
+  @volatile var lastInstance: Option[CustomPersistenceEngine] = None
+}
+
+class CustomLeaderElectionAgent(val masterActor: LeaderElectable) extends LeaderElectionAgent {
+  masterActor.electedLeader()
+}
+
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 34c74d87f0a62..0faa8f650e5e1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -17,12 +17,20 @@
 
 package org.apache.spark.deploy.master
 
+import java.util.Date
+
+import scala.concurrent.Await
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
 import akka.actor.Address
-import org.scalatest.FunSuite
+import org.scalatest.{FunSuite, Matchers}
+import other.supplier.{CustomPersistenceEngine, CustomRecoveryModeFactory}
 
-import org.apache.spark.{SSLOptions, SparkConf, SparkException}
+import org.apache.spark.deploy._
+import org.apache.spark.{SparkConf, SparkException}
 
-class MasterSuite extends FunSuite {
+class MasterSuite extends FunSuite with Matchers {
 
   test("toAkkaUrl") {
     val conf = new SparkConf(loadDefaults = false)
@@ -63,4 +71,90 @@ class MasterSuite extends FunSuite {
     }
     assert("Invalid master URL: spark://1.2. 3.4:1234" === e.getMessage)
   }
+
+  test("can use a custom recovery mode factory") {
+    val conf = new SparkConf(loadDefaults = false)
+    conf.set("spark.deploy.recoveryMode", "CUSTOM")
+    conf.set("spark.deploy.recoveryMode.factory",
+      classOf[CustomRecoveryModeFactory].getCanonicalName)
+
+    val instantiationAttempts = CustomRecoveryModeFactory.instantiationAttempts
+
+    val commandToPersist = new Command(
+      mainClass = "",
+      arguments = Nil,
+      environment = Map.empty,
+      classPathEntries = Nil,
+      libraryPathEntries = Nil,
+      javaOpts = Nil
+    )
+
+    val appToPersist = new ApplicationInfo(
+      startTime = 0,
+      id = "test_app",
+      desc = new ApplicationDescription(
+        name = "",
+        maxCores = None,
+        memoryPerExecutorMB = 0,
+        command = commandToPersist,
+        appUiUrl = "",
+        eventLogDir = None,
+        eventLogCodec = None,
+        coresPerExecutor = None),
+      submitDate = new Date(),
+      driver = null,
+      defaultCores = 0
+    )
+
+    val driverToPersist = new DriverInfo(
+      startTime = 0,
+      id = "test_driver",
+      desc = new DriverDescription(
+        jarUrl = "",
+        mem = 0,
+        cores = 0,
+        supervise = false,
+        command = commandToPersist
+      ),
+      submitDate = new Date()
+    )
+
+    val workerToPersist = new WorkerInfo(
+      id = "test_worker",
+      host = "127.0.0.1",
+      port = 10000,
+      cores = 0,
+      memory = 0,
+      actor = null,
+      webUiPort = 0,
+      publicAddress = ""
+    )
+
+    val (actorSystem, port, uiPort, restPort) =
+      Master.startSystemAndActor("127.0.0.1", 7077, 8080, conf)
+
+    try {
+      Await.result(actorSystem.actorSelection("/user/Master").resolveOne(10 seconds), 10 seconds)
+
+      CustomPersistenceEngine.lastInstance.isDefined shouldBe true
+      val persistenceEngine = CustomPersistenceEngine.lastInstance.get
+
+      persistenceEngine.addApplication(appToPersist)
+      persistenceEngine.addDriver(driverToPersist)
+      persistenceEngine.addWorker(workerToPersist)
+
+      val (apps, drivers, workers) = persistenceEngine.readPersistedData()
+
+      apps.map(_.id) should contain(appToPersist.id)
+      drivers.map(_.id) should contain(driverToPersist.id)
+      workers.map(_.id) should contain(workerToPersist.id)
+
+    } finally {
+      actorSystem.shutdown()
+      actorSystem.awaitTermination()
+    }
+
+    CustomRecoveryModeFactory.instantiationAttempts should be > instantiationAttempts
+  }
+
 }

From a1ec08f7edc8d956afcfbb92d10b26b7619486e8 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@databricks.com>
Date: Fri, 8 May 2015 14:41:42 -0400
Subject: [PATCH 039/320] [SPARK-7298] Harmonize style of new visualizations

- Colors on the timeline now match the rest of the UI
- The expandable buttons to show timeline view, DAG, etc are now more visible
- Timeline text is smaller
- DAG visualization text and colors are more consistent throughout
- Fix some JavaScript style issues
- Various small fixes throughout (e.g. inconsistent capitalization, some confusing names, HTML escaping, etc)

Author: Matei Zaharia <matei@databricks.com>

Closes #5942 from mateiz/ui and squashes the following commits:

def38d0 [Matei Zaharia] Add some tooltips
4c5a364 [Matei Zaharia] Reduce stage and rank separation slightly
43dcbe3 [Matei Zaharia] Some updates to DAG
fac734a [Matei Zaharia] tweaks
6a6705d [Matei Zaharia] More fixes
67629f5 [Matei Zaharia] Various small tweaks
---
 .../apache/spark/ui/static/spark-dag-viz.css  |  60 +++++---
 .../apache/spark/ui/static/spark-dag-viz.js   |  57 ++++----
 .../apache/spark/ui/static/timeline-view.css  | 128 +++++++++++-------
 .../apache/spark/ui/static/timeline-view.js   |   4 +-
 .../org/apache/spark/ui/static/webui.css      |  36 +++--
 .../scala/org/apache/spark/ui/ToolTips.scala  |  19 +++
 .../scala/org/apache/spark/ui/UIUtils.scala   |  15 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    |  31 +++--
 .../apache/spark/ui/jobs/AllStagesPage.scala  |  15 +-
 .../org/apache/spark/ui/jobs/JobPage.scala    |  23 ++--
 .../org/apache/spark/ui/jobs/StagePage.scala  |  16 +--
 .../spark/ui/scope/RDDOperationGraph.scala    |   2 +-
 12 files changed, 255 insertions(+), 151 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
index 8481710828455..18c72694f3e2d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
@@ -16,40 +16,51 @@
  */
 
 #dag-viz-graph svg path {
-  stroke: #444444;
+  stroke: #444;
   stroke-width: 1.5px;
 }
 
 #dag-viz-graph svg g.cluster rect {
-  stroke-width: 4px;
-  stroke-opacity: 0.5;
+  stroke-width: 1px;
+}
+
+#dag-viz-graph svg g.node circle {
+  fill: #444;
 }
 
-#dag-viz-graph svg g.node circle,
 #dag-viz-graph svg g.node rect {
-  fill: #444444;
+  fill: #C3EBFF;
+  stroke: #3EC0FF;
+  stroke-width: 1px;
+}
+
+#dag-viz-graph svg g.node.cached circle {
+  fill: #444;
 }
 
-#dag-viz-graph svg g.node.cached circle,
 #dag-viz-graph svg g.node.cached rect {
-  fill: #FF0000;
+  fill: #B3F5C5;
+  stroke: #56F578;
+  stroke-width: 1px;
 }
 
 /* Job page specific styles */
 
 #dag-viz-graph svg.job marker#marker-arrow path {
-  fill: #444444;
+  fill: #333;
   stroke-width: 0px;
 }
 
 #dag-viz-graph svg.job g.cluster rect {
-  fill: #FFFFFF;
-  stroke: #AADFFF;
+  fill: #A0DFFF;
+  stroke: #3EC0FF;
+  stroke-width: 1px;
 }
 
 #dag-viz-graph svg.job g.cluster[id*="stage"] rect {
-  stroke: #FFDDEE;
-  stroke-width: 6px;
+  fill: #FFFFFF;
+  stroke: #FF99AC;
+  stroke-width: 1px;
 }
 
 #dag-viz-graph svg.job g#cross-stage-edges path {
@@ -57,27 +68,36 @@
 }
 
 #dag-viz-graph svg.job g.cluster text {
-  fill: #AAAAAA;
+  fill: #333;
 }
 
 /* Stage page specific styles */
 
 #dag-viz-graph svg.stage g.cluster rect {
-  fill: #F0F8FF;
-  stroke: #AADFFF;
+  fill: #A0DFFF;
+  stroke: #3EC0FF;
+  stroke-width: 1px;
 }
 
 #dag-viz-graph svg.stage g.cluster[id*="stage"] rect {
   fill: #FFFFFF;
-  stroke: #FFDDEE;
-  stroke-width: 6px;
+  stroke: #FFA6B6;
+  stroke-width: 1px;
 }
 
 #dag-viz-graph svg.stage g.node g.label text tspan {
-  fill: #FFFFFF;
+  fill: #333;
 }
 
 #dag-viz-graph svg.stage g.cluster text {
-  fill: #444444;
-  font-weight: bold;
+  fill: #333;
+}
+
+#dag-viz-graph a, #dag-viz-graph a:hover {
+  text-decoration: none;
+}
+
+#dag-viz-graph .label {
+  font-weight: normal;
+  text-shadow: none;
 }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index a0e3e914c2547..764dd2cfcd76f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -52,9 +52,9 @@
  */
 
 var VizConstants = {
-  svgMarginX: 20,
-  svgMarginY: 20,
-  stageSep: 50,
+  svgMarginX: 16,
+  svgMarginY: 16,
+  stageSep: 40,
   graphPrefix: "graph_",
   nodePrefix: "node_",
   stagePrefix: "stage_",
@@ -63,14 +63,16 @@ var VizConstants = {
 };
 
 var JobPageVizConstants = {
-  clusterLabelSize: 11,
-  stageClusterLabelSize: 14
-}
+  clusterLabelSize: 12,
+  stageClusterLabelSize: 14,
+  rankSep: 40
+};
 
 var StagePageVizConstants = {
   clusterLabelSize: 14,
-  stageClusterLabelSize: 18
-}
+  stageClusterLabelSize: 14,
+  rankSep: 40
+};
 
 /*
  * Show or hide the RDD DAG visualization.
@@ -149,11 +151,11 @@ function renderDagVizForStage(svgContainer) {
   var dot = metadata.select(".dot-file").text();
   var containerId = VizConstants.graphPrefix + metadata.attr("stage-id");
   var container = svgContainer.append("g").attr("id", containerId);
-  renderDot(dot, container);
+  renderDot(dot, container, StagePageVizConstants.rankSep);
 
-  // Round corners on RDDs
+  // Round corners on rectangles
   svgContainer
-    .selectAll("g.node rect")
+    .selectAll("rect")
     .attr("rx", "5")
     .attr("ry", "5");
 }
@@ -207,7 +209,13 @@ function renderDagVizForJob(svgContainer) {
     }
 
     // Actually render the stage
-    renderDot(dot, container);
+    renderDot(dot, container, JobPageVizConstants.rankSep);
+
+    // Round corners on rectangles
+    container
+      .selectAll("rect")
+      .attr("rx", "4")
+      .attr("ry", "4");
 
     // If there are any incoming edges into this graph, keep track of them to render
     // them separately later. Note that we cannot draw them now because we need to
@@ -223,12 +231,13 @@ function renderDagVizForJob(svgContainer) {
 }
 
 /* Render the dot file as an SVG in the given container. */
-function renderDot(dot, container) {
+function renderDot(dot, container, rankSep) {
   var escaped_dot = dot
     .replace(/&lt;/g, "<")
     .replace(/&gt;/g, ">")
     .replace(/&quot;/g, "\"");
   var g = graphlibDot.read(escaped_dot);
+  g.graph().rankSep = rankSep;
   var renderer = new dagreD3.render();
   renderer(container, g);
 }
@@ -248,12 +257,13 @@ function metadataContainer() { return d3.select("#dag-viz-metadata"); }
  * In general, the clustering support for dagre-d3 is quite limited at this point.
  */
 function drawClusterLabels(svgContainer, forJob) {
+  var clusterLabelSize, stageClusterLabelSize;
   if (forJob) {
-    var clusterLabelSize = JobPageVizConstants.clusterLabelSize;
-    var stageClusterLabelSize = JobPageVizConstants.stageClusterLabelSize;
+    clusterLabelSize = JobPageVizConstants.clusterLabelSize;
+    stageClusterLabelSize = JobPageVizConstants.stageClusterLabelSize;
   } else {
-    var clusterLabelSize = StagePageVizConstants.clusterLabelSize;
-    var stageClusterLabelSize = StagePageVizConstants.stageClusterLabelSize;
+    clusterLabelSize = StagePageVizConstants.clusterLabelSize;
+    stageClusterLabelSize = StagePageVizConstants.stageClusterLabelSize;
   }
   svgContainer.selectAll("g.cluster").each(function() {
     var cluster = d3.select(this);
@@ -283,7 +293,7 @@ function drawClusterLabel(d3cluster, fontSize) {
     .attr("x", labelX)
     .attr("y", labelY)
     .attr("text-anchor", "end")
-    .style("font-size", fontSize)
+    .style("font-size", fontSize + "px")
     .text(labelText);
 }
 
@@ -303,12 +313,12 @@ function resizeSvg(svg) {
     }));
   var endX = VizConstants.svgMarginX +
     toFloat(d3.max(allClusters, function(e) {
-      var t = d3.select(e)
+      var t = d3.select(e);
       return getAbsolutePosition(t).x + toFloat(t.attr("width"));
     }));
   var endY = VizConstants.svgMarginY +
     toFloat(d3.max(allClusters, function(e) {
-      var t = d3.select(e)
+      var t = d3.select(e);
       return getAbsolutePosition(t).y + toFloat(t.attr("height"));
     }));
   var width = endX - startX;
@@ -338,7 +348,7 @@ function drawCrossStageEdges(edges, svgContainer) {
   if (!dagreD3Marker.empty()) {
     svgContainer
       .append(function() { return dagreD3Marker.node().cloneNode(true); })
-      .attr("id", "marker-arrow")
+      .attr("id", "marker-arrow");
     svgContainer.selectAll("g > path").attr("marker-end", "url(#marker-arrow)");
     svgContainer.selectAll("g.edgePaths def").remove(); // We no longer need these
   }
@@ -394,12 +404,13 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
     toPos.x += delta;
   }
 
+  var points;
   if (fromPos.y == toPos.y) {
     // If they are on the same rank, curve the middle part of the edge
     // upward a little to avoid interference with things in between
     // e.g.       _______
     //      _____/       \_____
-    var points = [
+    points = [
       [fromPos.x, fromPos.y],
       [fromPos.x + (toPos.x - fromPos.x) * 0.2, fromPos.y],
       [fromPos.x + (toPos.x - fromPos.x) * 0.3, fromPos.y - 20],
@@ -413,7 +424,7 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
     //           /
     //          |
     //    _____/
-    var points = [
+    points = [
       [fromPos.x, fromPos.y],
       [fromPos.x + (toPos.x - fromPos.x) * 0.4, fromPos.y],
       [fromPos.x + (toPos.x - fromPos.x) * 0.6, toPos.y],
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
index 35ef14e5aaf1a..d40de704229c3 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
@@ -23,6 +23,10 @@ div#application-timeline, div#job-timeline {
   margin-top: 5px;
 }
 
+.vis.timeline {
+  line-height: 14px;
+}
+
 .vis.timeline div.content {
   width: 100%;
 }
@@ -32,48 +36,55 @@ div#application-timeline, div#job-timeline {
 }
 
 .vis.timeline .item.stage.succeeded {
-  background-color: #D5DDF6;
+  background-color: #A0DFFF;
+  border-color: #3EC0FF;
 }
 
 .vis.timeline .item.stage.succeeded.selected {
-   background-color: #D5DDF6;
-   border-color: #97B0F8;
-   z-index: auto;
+  background-color: #A0DFFF;
+  border-color: #3EC0FF;
+  z-index: auto;
 }
 
 .legend-area rect.completed-stage-legend {
-  fill: #D5DDF6;
-  stroke: #97B0F8;
+  fill: #A0DFFF;
+  stroke: #3EC0FF;
 }
 
 .vis.timeline .item.stage.failed {
-   background-color: #FF5475;
+  background-color: #FFA1B0;
+  border-color: #FF4D6D;
 }
 
 .vis.timeline .item.stage.failed.selected {
-   background-color: #FF5475;
-   border-color: #97B0F8;
-   z-index: auto;
+  background-color: #FFA1B0;
+  border-color: #FF4D6D;
+  z-index: auto;
 }
 
 .legend-area rect.failed-stage-legend {
-  fill: #FF5475;
-  stroke: #97B0F8;
+  fill: #FFA1B0;
+  stroke: #FF4D6D;
 }
 
 .vis.timeline .item.stage.running {
-   background-color: #FDFFCA;
+  background-color: #A2FCC0;
+  border-color: #36F572;
 }
 
 .vis.timeline .item.stage.running.selected {
-   background-color: #FDFFCA;
-   border-color: #97B0F8;
-   z-index: auto;
+  background-color: #A2FCC0;
+  border-color: #36F572;
+  z-index: auto;
 }
 
 .legend-area rect.active-stage-legend {
-  fill: #FDFFCA;
-  stroke: #97B0F8;
+  fill: #A2FCC0;
+  stroke: #36F572;
+}
+
+.vis.timeline .foreground {
+  cursor: move;
 }
 
 .vis.timeline .item.job {
@@ -81,76 +92,81 @@ div#application-timeline, div#job-timeline {
 }
 
 .vis.timeline .item.job.succeeded {
-  background-color: #D5DDF6;
+  background-color: #A0DFFF;
+  border-color: #3EC0FF;
 }
 
 .vis.timeline .item.job.succeeded.selected {
-   background-color: #D5DDF6;
-   border-color: #97B0F8;
-   z-index: auto;
+  background-color: #A0DFFF;
+  border-color: #3EC0FF;
+  z-index: auto;
 }
 
 .legend-area rect.succeeded-job-legend {
-  fill: #D5DDF6;
-  stroke: #97B0F8;
+  fill: #A0DFFF;
+  stroke: #3EC0FF;
 }
 
 .vis.timeline .item.job.failed {
-   background-color: #FF5475;
+  background-color: #FFA1B0;
+  border-color: #FF4D6D;
 }
 
 .vis.timeline .item.job.failed.selected {
-   background-color: #FF5475;
-   border-color: #97B0F8;
-   z-index: auto;
+  background-color: #FFA1B0;
+  border-color: #FF4D6D;
+  z-index: auto;
 }
 
 .legend-area rect.failed-job-legend {
-  fill: #FF5475;
-  stroke: #97B0F8;
+  fill: #FFA1B0;
+  stroke: #FF4D6D;
 }
 
 .vis.timeline .item.job.running {
-   background-color: #FDFFCA;
+  background-color: #A2FCC0;
+  border-color: #36F572;
 }
 
 .vis.timeline .item.job.running.selected {
-   background-color: #FDFFCA;
-   border-color: #97B0F8;
-   z-index: auto;
+  background-color: #A2FCC0;
+  border-color: #36F572;
+  z-index: auto;
 }
 
 .legend-area rect.running-job-legend {
-  fill: #FDFFCA;
-  stroke: #97B0F8;
+  fill: #A2FCC0;
+  stroke: #36F572;
 }
 
 .vis.timeline .item.executor.added {
-  background-color: #D5DDF6;
+  background-color: #A0DFFF;
+  border-color: #3EC0FF;
 }
 
 .legend-area rect.executor-added-legend {
-  fill: #D5DDF6;
-  stroke: #97B0F8;
+  fill: #A0DFFF;
+  stroke: #3EC0FF;
 }
 
 .vis.timeline .item.executor.removed {
-  background-color: #EBCA59;
+  background-color: #FFA1B0;
+  border-color: #FF4D6D;
 }
 
 .legend-area rect.executor-removed-legend {
-  fill: #EBCA59;
-  stroke: #97B0F8;
+  fill: #FFA1B0;
+  stroke: #FF4D6D;
 }
 
 .vis.timeline .item.executor.selected {
-  border-color: #FFC200;
-  background-color: #FFF785;
+  background-color: #A2FCC0;
+  border-color: #36F572;
   z-index: 2;
 }
 
-tr.corresponding-item-hover>td, tr.corresponding-item-hover>th {
-  background-color: #FFE1FA !important;
+tr.corresponding-item-hover > td, tr.corresponding-item-hover > th {
+  background-color: #D6FFE4 !important;
 }
 
 #application-timeline.collapsed {
@@ -165,11 +181,15 @@ tr.corresponding-item-hover>td, tr.corresponding-item-hover>th {
   margin-bottom: 5px;
 }
 
+.control-panel input[type="checkbox"] {
+  margin: 0;
+}
+
 span.expand-application-timeline, span.expand-job-timeline {
   cursor: pointer;
 }
 
-.control-panel input+span {
+.control-panel input + span {
   cursor: pointer;
 }
 
@@ -180,3 +200,17 @@ span.expand-application-timeline, span.expand-job-timeline {
 .vis.timeline .item .tooltip-inner {
   max-width: unset !important;
 }
+
+.vispanel.center {
+  font-size: 12px;
+  line-height: 12px;
+}
+
+.legend-area text {
+  fill: #4D4D4D;
+}
+
+.additional-metrics ul {
+  list-style: none;
+  margin-left: 15px;
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
index e4a891d47f035..48fbb33b1155b 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
@@ -156,9 +156,9 @@ function setupExecutorEventAction() {
 function setupZoomable(id, timeline) {
   $(id + '>input[type="checkbox"]').click(function() {
     if (this.checked) {
-      timeline.setOptions({zoomable: false});
-    } else {
       timeline.setOptions({zoomable: true});
+    } else {
+      timeline.setOptions({zoomable: false});
     }
   });
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index 669ad48937c05..e7c1d475d4e52 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -106,14 +106,18 @@ span.rest-uri {
 }
 
 pre {
-  font-size: 0.8em;
+  font-size: 12px;
+  line-height: 18px;
+  padding: 6px;
+  margin: 0;
+  border-radius: 3px;
 }
 
 .stage-details {
   max-height: 100px;
   overflow-y: auto;
   margin: 0;
-  transition: max-height 0.5s ease-out, padding 0.5s ease-out;
+  transition: max-height 0.25s ease-out, padding 0.25s ease-out;
 }
 
 .stage-details.collapsed {
@@ -135,7 +139,7 @@ pre {
   max-height: 300px;
   overflow-y: auto;
   margin: 0;
-  transition: max-height 0.5s ease-out, padding 0.5s ease-out;
+  transition: max-height 0.25s ease-out, padding 0.25s ease-out;
 }
 
 .stacktrace-details.collapsed {
@@ -158,7 +162,7 @@ span.additional-metric-title {
 }
 
 .tooltip {
- font-weight: normal;
+  font-weight: normal;
 }
 
 .arrow-open {
@@ -166,9 +170,9 @@ span.additional-metric-title {
   height: 0;
   border-left: 5px solid transparent;
   border-right: 5px solid transparent;
-  border-top: 5px solid black;
-  float: left;
-  margin-top: 6px;
+  border-top: 5px solid #08c;
+  display: inline-block;
+  margin-bottom: 2px;
 }
 
 .arrow-closed {
@@ -176,8 +180,10 @@ span.additional-metric-title {
   height: 0;
   border-top: 5px solid transparent;
   border-bottom: 5px solid transparent;
-  border-left: 5px solid black;
+  border-left: 5px solid #08c;
   display: inline-block;
+  margin-left: 2px;
+  margin-right: 3px;
 }
 
 .version {
@@ -196,3 +202,17 @@ span.additional-metric-title {
 .serialization_time, .getting_result_time {
   display: none;
 }
+
+.accordion-inner {
+  background: #f5f5f5;
+}
+
+.accordion-inner pre {
+  border: 0;
+  padding: 0;
+  background: none;
+}
+
+a.expandbutton {
+  cursor: pointer;
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
index 24f3236456248..063e2a1f8b18e 100644
--- a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
@@ -57,4 +57,23 @@ private[spark] object ToolTips {
   val GC_TIME =
     """Time that the executor spent paused for Java garbage collection while the task was
        running."""
+
+  val JOB_TIMELINE =
+    """Shows when jobs started and ended and when executors joined or left. Drag to scroll.
+       Click Enable Zooming and use mouse wheel to zoom in/out."""
+
+  val STAGE_TIMELINE =
+    """Shows when stages started and ended and when executors joined or left. Drag to scroll.
+       Click Enable Zooming and use mouse wheel to zoom in/out."""
+
+  val JOB_DAG =
+    """Shows a graph of stages executed for this job, each of which can contain
+       multiple RDD operations (e.g. map() and filter()), and of RDDs inside each operation
+       (shown as dots)."""
+
+  val STAGE_DAG =
+    """Shows a graph of RDD operations in this stage, and RDDs inside each one. A stage can run
+       multiple operations (e.g. two map() functions) if they can be pipelined. Some operations
+       also create multiple RDDs internally. Cached RDDs are shown in green.
+    """
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 97eed13c2d780..6a0f5c5d16daa 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -156,10 +156,10 @@ private[spark] object UIUtils extends Logging {
 
   def commonHeaderNodes: Seq[Node] = {
     <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
-    <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")} type="text/css" />
-    <link rel="stylesheet" href={prependBaseUri("/static/webui.css")} type="text/css" />
-    <link rel="stylesheet" href={prependBaseUri("/static/vis.min.css")} type="text/css" />
-    <link rel="stylesheet" href={prependBaseUri("/static/timeline-view.css")} type="text/css" />
+    <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/vis.min.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/webui.css")} type="text/css"/>
+    <link rel="stylesheet" href={prependBaseUri("/static/timeline-view.css")} type="text/css"/>
     <script src={prependBaseUri("/static/sorttable.js")} ></script>
     <script src={prependBaseUri("/static/jquery-1.11.1.min.js")}></script>
     <script src={prependBaseUri("/static/vis.min.js")}></script>
@@ -250,7 +250,7 @@ private[spark] object UIUtils extends Logging {
               <h3 style="vertical-align: middle; display: inline-block;">
                 <a style="text-decoration: none" href={prependBaseUri("/")}>
                   <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
-                  <span class="version" 
+                  <span class="version"
                         style="margin-right: 15px;">{org.apache.spark.SPARK_VERSION}</span>
                 </a>
                 {title}
@@ -350,7 +350,10 @@ private[spark] object UIUtils extends Logging {
     <div>
       <span class="expand-dag-viz" onclick={s"toggleDagViz($forJob);"}>
         <span class="expand-dag-viz-arrow arrow-closed"></span>
-        <strong>DAG visualization</strong>
+        <a data-toggle="tooltip" title={if (forJob) ToolTips.JOB_DAG else ToolTips.STAGE_DAG}
+           data-placement="right">
+          DAG Visualization
+        </a>
       </span>
       <div id="dag-viz-graph"></div>
       <div id="dag-viz-metadata">
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 09323d1d80ad6..e010ebef3b34a 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.ui.jobs
 
 import scala.collection.mutable.{HashMap, ListBuffer}
-import scala.xml.{Node, NodeSeq, Unparsed}
+import scala.xml.{Node, NodeSeq, Unparsed, Utility}
 
 import java.util.Date
 import javax.servlet.http.HttpServletRequest
 
-import org.apache.spark.ui.{UIUtils, WebUIPage}
+import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 import org.apache.spark.ui.jobs.UIData.{ExecutorUIData, JobUIData}
 import org.apache.spark.JobExecutionStatus
 
@@ -81,6 +81,9 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
         case JobExecutionStatus.RUNNING => "running"
       }
 
+      // The timeline library treats contents as HTML, so we have to escape them; for the
+      // data-title attribute string we have to escape them twice since that's in a string.
+      val escapedDesc = Utility.escape(displayJobDescription)
       val jobEventJsonAsStr =
         s"""
            |{
@@ -90,16 +93,17 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
            |  'end': new Date(${completionTime}),
            |  'content': '<div class="application-timeline-content"' +
            |     'data-html="true" data-placement="top" data-toggle="tooltip"' +
-           |     'data-title="${displayJobDescription} (Job ${jobId})<br>Status: ${status}<br>' +
-           |     'Submission Time: ${UIUtils.formatDate(new Date(submissionTime))}' +
+           |     'data-title="${Utility.escape(escapedDesc)} (Job ${jobId})<br>' +
+           |     'Status: ${status}<br>' +
+           |     'Submitted: ${UIUtils.formatDate(new Date(submissionTime))}' +
            |     '${
                      if (status != JobExecutionStatus.RUNNING) {
-                       s"""<br>Completion Time: ${UIUtils.formatDate(new Date(completionTime))}"""
+                       s"""<br>Completed: ${UIUtils.formatDate(new Date(completionTime))}"""
                      } else {
                        ""
                      }
                   }">' +
-           |    '${displayJobDescription} (Job ${jobId})</div>'
+           |    '${escapedDesc} (Job ${jobId})</div>'
            |}
          """.stripMargin
       jobEventJsonAsStr
@@ -179,13 +183,15 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
 
     <span class="expand-application-timeline">
       <span class="expand-application-timeline-arrow arrow-closed"></span>
-      <strong>Event timeline</strong>
+      <a data-toggle="tooltip" title={ToolTips.JOB_TIMELINE} data-placement="right">
+        Event Timeline
+      </a>
     </span> ++
     <div id="application-timeline" class="collapsed">
       <div class="control-panel">
         <div id="application-timeline-zoom-lock">
-          <input type="checkbox" checked="checked"></input>
-          <span>Zoom Lock</span>
+          <input type="checkbox"></input>
+          <span>Enable zooming</span>
         </div>
       </div>
     </div> ++
@@ -283,7 +289,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
             {if (parent.sc.isDefined) {
               // Total duration is not meaningful unless the UI is live
               <li>
-                <strong>Total Duration: </strong>
+                <strong>Total Uptime: </strong>
                 {UIUtils.formatDuration(System.currentTimeMillis() - startTime)}
               </li>
             }}
@@ -336,9 +342,8 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
           failedJobsTable
       }
 
-      val helpText = """A job is triggered by an action, like "count()" or "saveAsTextFile()".""" +
-        " Click on a job's title to see information about the stages of tasks associated with" +
-        " the job."
+      val helpText = """A job is triggered by an action, like count() or saveAsTextFile().""" +
+        " Click on a job to see information about the stages of tasks inside it."
 
       UIUtils.headerSparkPage("Spark Jobs", content, parent, helpText = Some(helpText))
     }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
index a37f739ab9c66..5e52942b64f3f 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -73,19 +73,6 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
       val summary: NodeSeq =
         <div>
           <ul class="unstyled">
-            {
-              if (sc.isDefined) {
-                // Total duration is not meaningful unless the UI is live
-                <li>
-                  <strong>Total Duration: </strong>
-                  {UIUtils.formatDuration(now - sc.get.startTime)}
-                </li>
-              }
-            }
-            <li>
-              <strong>Scheduling Mode: </strong>
-              {listener.schedulingMode.map(_.toString).getOrElse("Unknown")}
-            </li>
             {
               if (shouldShowActiveStages) {
                 <li>
@@ -145,7 +132,7 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
         content ++= <h4 id ="failed">Failed Stages ({numFailedStages})</h4> ++
         failedStagesTable.toNodeSeq
       }
-      UIUtils.headerSparkPage("Spark Stages (for all jobs)", content, parent)
+      UIUtils.headerSparkPage("Stages for All Jobs", content, parent)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 7163217e1fed0..2cad0a796913e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -20,13 +20,13 @@ package org.apache.spark.ui.jobs
 import java.util.Date
 
 import scala.collection.mutable.{Buffer, HashMap, ListBuffer}
-import scala.xml.{NodeSeq, Node, Unparsed}
+import scala.xml.{NodeSeq, Node, Unparsed, Utility}
 
 import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.JobExecutionStatus
 import org.apache.spark.scheduler.StageInfo
-import org.apache.spark.ui.{UIUtils, WebUIPage}
+import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 import org.apache.spark.ui.jobs.UIData.ExecutorUIData
 
 /** Page showing statistics and stage list for a given job */
@@ -64,6 +64,9 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
       val submissionTime = stage.submissionTime.get
       val completionTime = stage.completionTime.getOrElse(System.currentTimeMillis())
 
+      // The timeline library treats contents as HTML, so we have to escape them; for the
+      // data-title attribute string we have to escape them twice since that's in a string.
+      val escapedName = Utility.escape(name)
       s"""
          |{
          |  'className': 'stage job-timeline-object ${status}',
@@ -72,17 +75,17 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
          |  'end': new Date(${completionTime}),
          |  'content': '<div class="job-timeline-content" data-toggle="tooltip"' +
          |   'data-placement="top" data-html="true"' +
-         |   'data-title="${name} (Stage ${stageId}.${attemptId})<br>' +
+         |   'data-title="${Utility.escape(escapedName)} (Stage ${stageId}.${attemptId})<br>' +
          |   'Status: ${status.toUpperCase}<br>' +
-         |   'Submission Time: ${UIUtils.formatDate(new Date(submissionTime))}' +
+         |   'Submitted: ${UIUtils.formatDate(new Date(submissionTime))}' +
          |   '${
                  if (status != "running") {
-                   s"""<br>Completion Time: ${UIUtils.formatDate(new Date(completionTime))}"""
+                   s"""<br>Completed: ${UIUtils.formatDate(new Date(completionTime))}"""
                  } else {
                    ""
                  }
               }">' +
-         |    '${name} (Stage ${stageId}.${attemptId})</div>',
+         |    '${escapedName} (Stage ${stageId}.${attemptId})</div>',
          |}
        """.stripMargin
     }
@@ -161,13 +164,15 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
 
     <span class="expand-job-timeline">
       <span class="expand-job-timeline-arrow arrow-closed"></span>
-      <strong>Event timeline</strong>
+      <a data-toggle="tooltip" title={ToolTips.STAGE_TIMELINE} data-placement="right">
+        Event Timeline
+      </a>
     </span> ++
     <div id="job-timeline" class="collapsed">
       <div class="control-panel">
         <div id="job-timeline-zoom-lock">
-          <input type="checkbox" checked="checked"></input>
-          <span>Zoom Lock</span>
+          <input type="checkbox"></input>
+          <span>Enable zooming</span>
         </div>
       </div>
     </div> ++
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index b01fad8e453c8..8f7b1c2f09665 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -81,7 +81,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         <div>
           <ul class="unstyled">
             <li>
-              <strong>Total task time across all tasks: </strong>
+              <strong>Total Time Across All Tasks: </strong>
               {UIUtils.formatDuration(stageData.executorRunTime)}
             </li>
             {if (stageData.hasInput) {
@@ -98,25 +98,25 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
             }}
             {if (stageData.hasShuffleRead) {
               <li>
-                <strong>Shuffle read: </strong>
+                <strong>Shuffle Read: </strong>
                 {s"${Utils.bytesToString(stageData.shuffleReadTotalBytes)} / " +
                  s"${stageData.shuffleReadRecords}"}
               </li>
             }}
             {if (stageData.hasShuffleWrite) {
               <li>
-                <strong>Shuffle write: </strong>
+                <strong>Shuffle Write: </strong>
                  {s"${Utils.bytesToString(stageData.shuffleWriteBytes)} / " +
                  s"${stageData.shuffleWriteRecords}"}
               </li>
             }}
             {if (stageData.hasBytesSpilled) {
               <li>
-                <strong>Shuffle spill (memory): </strong>
+                <strong>Shuffle Spill (Memory): </strong>
                 {Utils.bytesToString(stageData.memoryBytesSpilled)}
               </li>
               <li>
-                <strong>Shuffle spill (disk): </strong>
+                <strong>Shuffle Spill (Disk): </strong>
                 {Utils.bytesToString(stageData.diskBytesSpilled)}
               </li>
             }}
@@ -127,10 +127,10 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         <div>
           <span class="expand-additional-metrics">
             <span class="expand-additional-metrics-arrow arrow-closed"></span>
-            <strong>Show additional metrics</strong>
+            <a>Show Additional Metrics</a>
           </span>
           <div class="additional-metrics collapsed">
-            <ul style="list-style-type:none">
+            <ul>
               <li>
                   <input type="checkbox" id="select-all-metrics"/>
                   <span class="additional-metric-title"><em>(De)select All</em></span>
@@ -457,9 +457,9 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
 
       val content =
         summary ++
-        showAdditionalMetrics ++
         dagViz ++
         maybeExpandDagViz ++
+        showAdditionalMetrics ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
         <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index 2b2db9e62be4e..c7045c98c8a03 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -182,7 +182,7 @@ private[ui] object RDDOperationGraph extends Logging {
     if (forJob) {
       s"""${node.id} [label="$label" shape="circle" padding="5" labelStyle="font-size: 0"]"""
     } else {
-      s"""${node.id} [label="$label" padding="5" labelStyle="font-size: 10"]"""
+      s"""${node.id} [label="$label" padding="5" labelStyle="font-size: 12px"]"""
     }
   }
 

From 2d05f325dc3c70349bd17ed399897f22d967c687 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Fri, 8 May 2015 11:49:38 -0700
Subject: [PATCH 040/320] [SPARK-7133] [SQL] Implement struct, array, and map
 field accessor

It's the first step: generalize UnresolvedGetField to support all map, struct, and array
TODO: add `apply` in Scala and `__getitem__` in Python, and unify the `getItem` and `getField` methods to one single API(or should we keep them for compatibility?).

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #5744 from cloud-fan/generalize and squashes the following commits:

715c589 [Wenchen Fan] address comments
7ea5b31 [Wenchen Fan] fix python test
4f0833a [Wenchen Fan] add python test
f515d69 [Wenchen Fan] add apply method and test cases
8df6199 [Wenchen Fan] fix python test
239730c [Wenchen Fan] fix test compile
2a70526 [Wenchen Fan] use _bin_op in dataframe.py
6bf72bc [Wenchen Fan] address comments
3f880c3 [Wenchen Fan] add java doc
ab35ab5 [Wenchen Fan] fix python test
b5961a9 [Wenchen Fan] fix style
c9d85f5 [Wenchen Fan] generalize UnresolvedGetField to support all map, struct, and array
---
 python/pyspark/sql/dataframe.py               |  24 +-
 python/pyspark/sql/tests.py                   |   7 +
 .../apache/spark/sql/catalyst/SqlParser.scala |   4 +-
 .../sql/catalyst/analysis/Analyzer.scala      |   4 +-
 .../sql/catalyst/analysis/unresolved.scala    |  14 +-
 .../spark/sql/catalyst/dsl/package.scala      |   7 +-
 .../catalyst/expressions/ExtractValue.scala   | 206 ++++++++++++++++++
 .../catalyst/expressions/complexTypes.scala   | 131 -----------
 .../sql/catalyst/optimizer/Optimizer.scala    |   6 +-
 .../sql/catalyst/planning/patterns.scala      |   2 +-
 .../catalyst/plans/logical/LogicalPlan.scala  |   3 +-
 .../ExpressionEvaluationSuite.scala           |  69 ++++--
 .../optimizer/ConstantFoldingSuite.scala      |   8 +-
 .../scala/org/apache/spark/sql/Column.scala   |  19 +-
 .../org/apache/spark/sql/DataFrameSuite.scala |  10 +-
 .../org/apache/spark/sql/hive/HiveQl.scala    |   4 +-
 16 files changed, 327 insertions(+), 191 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index cee804f5cc1f7..a9697999e82cb 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1275,7 +1275,7 @@ def __init__(self, jc):
 
     # container operators
     __contains__ = _bin_op("contains")
-    __getitem__ = _bin_op("getItem")
+    __getitem__ = _bin_op("apply")
 
     # bitwise operators
     bitwiseOR = _bin_op("bitwiseOR")
@@ -1308,19 +1308,19 @@ def getField(self, name):
         >>> from pyspark.sql import Row
         >>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF()
         >>> df.select(df.r.getField("b")).show()
-        +---+
-        |r.b|
-        +---+
-        |  b|
-        +---+
+        +----+
+        |r[b]|
+        +----+
+        |   b|
+        +----+
         >>> df.select(df.r.a).show()
-        +---+
-        |r.a|
-        +---+
-        |  1|
-        +---+
+        +----+
+        |r[a]|
+        +----+
+        |   1|
+        +----+
         """
-        return Column(self._jc.getField(name))
+        return self[name]
 
     def __getattr__(self, item):
         if item.startswith("__"):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 45dfedce22add..7e63f4d6461f6 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -519,6 +519,13 @@ def test_access_nested_types(self):
         self.assertEqual("v", df.select(df.d["k"]).first()[0])
         self.assertEqual("v", df.select(df.d.getItem("k")).first()[0])
 
+    def test_field_accessor(self):
+        df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()
+        self.assertEqual(1, df.select(df.l[0]).first()[0])
+        self.assertEqual(1, df.select(df.r["a"]).first()[0])
+        self.assertEqual("b", df.select(df.r["b"]).first()[0])
+        self.assertEqual("v", df.select(df.d["k"]).first()[0])
+
     def test_infer_long_type(self):
         longrow = [Row(f1='a', f2=100000000000000)]
         df = self.sc.parallelize(longrow).toDF()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index b06bfb2ce8c8e..fc36b9f1f20d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -375,9 +375,9 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser {
   protected lazy val primary: PackratParser[Expression] =
     ( literal
     | expression ~ ("[" ~> expression <~ "]") ^^
-      { case base ~ ordinal => GetItem(base, ordinal) }
+      { case base ~ ordinal => UnresolvedExtractValue(base, ordinal) }
     | (expression <~ ".") ~ ident ^^
-      { case base ~ fieldName => UnresolvedGetField(base, fieldName) }
+      { case base ~ fieldName => UnresolvedExtractValue(base, Literal(fieldName)) }
     | cast
     | "(" ~> expression <~ ")"
     | function
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bb7913e186a85..ecbac57ea4d62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -348,8 +348,8 @@ class Analyzer(
               withPosition(u) { q.resolveChildren(nameParts, resolver).getOrElse(u) }
             logDebug(s"Resolving $u to $result")
             result
-          case UnresolvedGetField(child, fieldName) if child.resolved =>
-            GetField(child, fieldName, resolver)
+          case UnresolvedExtractValue(child, fieldExpr) if child.resolved =>
+            ExtractValue(child, fieldExpr, resolver)
         }
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index eb736ac3290e8..2999c2ef3efe1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -184,7 +184,17 @@ case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star {
   override def toString: String = expressions.mkString("ResolvedStar(", ", ", ")")
 }
 
-case class UnresolvedGetField(child: Expression, fieldName: String) extends UnaryExpression {
+/**
+ * Extracts a value or values from an Expression
+ *
+ * @param child The expression to extract value from,
+ *              can be Map, Array, Struct or array of Structs.
+ * @param extraction The expression to describe the extraction,
+ *                   can be key of Map, index of Array, field name of Struct.
+ */
+case class UnresolvedExtractValue(child: Expression, extraction: Expression)
+  extends UnaryExpression {
+
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
   override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
@@ -193,5 +203,5 @@ case class UnresolvedGetField(child: Expression, fieldName: String) extends Unar
   override def eval(input: Row = null): EvaluatedType =
     throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}")
 
-  override def toString: String = s"$child.$fieldName"
+  override def toString: String = s"$child[$extraction]"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index fa6cc7a1a36cf..4c0d70203c6f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp}
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.{TypeTag, typeTag}
 
-import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedGetField, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedExtractValue, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
@@ -100,8 +100,9 @@ package object dsl {
     def isNull: Predicate = IsNull(expr)
     def isNotNull: Predicate = IsNotNull(expr)
 
-    def getItem(ordinal: Expression): Expression = GetItem(expr, ordinal)
-    def getField(fieldName: String): UnresolvedGetField = UnresolvedGetField(expr, fieldName)
+    def getItem(ordinal: Expression): UnresolvedExtractValue = UnresolvedExtractValue(expr, ordinal)
+    def getField(fieldName: String): UnresolvedExtractValue =
+      UnresolvedExtractValue(expr, Literal(fieldName))
 
     def cast(to: DataType): Expression = Cast(expr, to)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
new file mode 100644
index 0000000000000..e05926cbfe74b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.collection.Map
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.types._
+
+object ExtractValue {
+  /**
+   * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`,
+   * depend on the type of `child` and `extraction`.
+   *
+   *   `child`      |    `extraction`    |    concrete `ExtractValue`
+   * ----------------------------------------------------------------
+   *    Struct      |   Literal String   |        GetStructField
+   * Array[Struct]  |   Literal String   |     GetArrayStructFields
+   *    Array       |   Integral type    |         GetArrayItem
+   *     Map        |      Any type      |         GetMapValue
+   */
+  def apply(
+      child: Expression,
+      extraction: Expression,
+      resolver: Resolver): ExtractValue = {
+
+    (child.dataType, extraction) match {
+      case (StructType(fields), Literal(fieldName, StringType)) =>
+        val ordinal = findField(fields, fieldName.toString, resolver)
+        GetStructField(child, fields(ordinal), ordinal)
+      case (ArrayType(StructType(fields), containsNull), Literal(fieldName, StringType)) =>
+        val ordinal = findField(fields, fieldName.toString, resolver)
+        GetArrayStructFields(child, fields(ordinal), ordinal, containsNull)
+      case (_: ArrayType, _) if extraction.dataType.isInstanceOf[IntegralType]  =>
+        GetArrayItem(child, extraction)
+      case (_: MapType, _) =>
+        GetMapValue(child, extraction)
+      case (otherType, _) =>
+        val errorMsg = otherType match {
+          case StructType(_) | ArrayType(StructType(_), _) =>
+            s"Field name should be String Literal, but it's $extraction"
+          case _: ArrayType =>
+            s"Array index should be integral type, but it's ${extraction.dataType}"
+          case other =>
+            s"Can't extract value from $child"
+        }
+        throw new AnalysisException(errorMsg)
+    }
+  }
+
+  def unapply(g: ExtractValue): Option[(Expression, Expression)] = {
+    g match {
+      case o: ExtractValueWithOrdinal => Some((o.child, o.ordinal))
+      case _ => Some((g.child, null))
+    }
+  }
+
+  /**
+   * Find the ordinal of StructField, report error if no desired field or over one
+   * desired fields are found.
+   */
+  private def findField(fields: Array[StructField], fieldName: String, resolver: Resolver): Int = {
+    val checkField = (f: StructField) => resolver(f.name, fieldName)
+    val ordinal = fields.indexWhere(checkField)
+    if (ordinal == -1) {
+      throw new AnalysisException(
+        s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
+    } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
+      throw new AnalysisException(
+        s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}")
+    } else {
+      ordinal
+    }
+  }
+}
+
+trait ExtractValue extends UnaryExpression {
+  self: Product =>
+
+  type EvaluatedType = Any
+}
+
+/**
+ * Returns the value of fields in the Struct `child`.
+ */
+case class GetStructField(child: Expression, field: StructField, ordinal: Int)
+  extends ExtractValue {
+
+  override def dataType: DataType = field.dataType
+  override def nullable: Boolean = child.nullable || field.nullable
+  override def foldable: Boolean = child.foldable
+  override def toString: String = s"$child.${field.name}"
+
+  override def eval(input: Row): Any = {
+    val baseValue = child.eval(input).asInstanceOf[Row]
+    if (baseValue == null) null else baseValue(ordinal)
+  }
+}
+
+/**
+ * Returns the array of value of fields in the Array of Struct `child`.
+ */
+case class GetArrayStructFields(
+    child: Expression,
+    field: StructField,
+    ordinal: Int,
+    containsNull: Boolean) extends ExtractValue {
+
+  override def dataType: DataType = ArrayType(field.dataType, containsNull)
+  override def nullable: Boolean = child.nullable
+  override def foldable: Boolean = child.foldable
+  override def toString: String = s"$child.${field.name}"
+
+  override def eval(input: Row): Any = {
+    val baseValue = child.eval(input).asInstanceOf[Seq[Row]]
+    if (baseValue == null) null else {
+      baseValue.map { row =>
+        if (row == null) null else row(ordinal)
+      }
+    }
+  }
+}
+
+abstract class ExtractValueWithOrdinal extends ExtractValue {
+  self: Product =>
+
+  def ordinal: Expression
+
+  /** `Null` is returned for invalid ordinals. */
+  override def nullable: Boolean = true
+  override def foldable: Boolean = child.foldable && ordinal.foldable
+  override def toString: String = s"$child[$ordinal]"
+  override def children: Seq[Expression] = child :: ordinal :: Nil
+
+  override def eval(input: Row): Any = {
+    val value = child.eval(input)
+    if (value == null) {
+      null
+    } else {
+      val o = ordinal.eval(input)
+      if (o == null) {
+        null
+      } else {
+        evalNotNull(value, o)
+      }
+    }
+  }
+
+  protected def evalNotNull(value: Any, ordinal: Any): Any
+}
+
+/**
+ * Returns the field at `ordinal` in the Array `child`
+ */
+case class GetArrayItem(child: Expression, ordinal: Expression)
+  extends ExtractValueWithOrdinal {
+
+  override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
+
+  override lazy val resolved = childrenResolved &&
+    child.dataType.isInstanceOf[ArrayType] && ordinal.dataType.isInstanceOf[IntegralType]
+
+  protected def evalNotNull(value: Any, ordinal: Any) = {
+    // TODO: consider using Array[_] for ArrayType child to avoid
+    // boxing of primitives
+    val baseValue = value.asInstanceOf[Seq[_]]
+    val index = ordinal.asInstanceOf[Int]
+    if (index >= baseValue.size || index < 0) {
+      null
+    } else {
+      baseValue(index)
+    }
+  }
+}
+
+/**
+ * Returns the value of key `ordinal` in Map `child`
+ */
+case class GetMapValue(child: Expression, ordinal: Expression)
+  extends ExtractValueWithOrdinal {
+
+  override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType
+
+  override lazy val resolved = childrenResolved && child.dataType.isInstanceOf[MapType]
+
+  protected def evalNotNull(value: Any, ordinal: Any) = {
+    val baseValue = value.asInstanceOf[Map[Any, _]]
+    baseValue.get(ordinal).orNull
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
index fc1f69655963d..956a2429b0b61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala
@@ -17,139 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import scala.collection.Map
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.types._
 
-/**
- * Returns the item at `ordinal` in the Array `child` or the Key `ordinal` in Map `child`.
- */
-case class GetItem(child: Expression, ordinal: Expression) extends Expression {
-  type EvaluatedType = Any
-
-  val children: Seq[Expression] = child :: ordinal :: Nil
-  /** `Null` is returned for invalid ordinals. */
-  override def nullable: Boolean = true
-  override def foldable: Boolean = child.foldable && ordinal.foldable
-
-  override def dataType: DataType = child.dataType match {
-    case ArrayType(dt, _) => dt
-    case MapType(_, vt, _) => vt
-  }
-  override lazy val resolved =
-    childrenResolved &&
-    (child.dataType.isInstanceOf[ArrayType] || child.dataType.isInstanceOf[MapType])
-
-  override def toString: String = s"$child[$ordinal]"
-
-  override def eval(input: Row): Any = {
-    val value = child.eval(input)
-    if (value == null) {
-      null
-    } else {
-      val key = ordinal.eval(input)
-      if (key == null) {
-        null
-      } else {
-        if (child.dataType.isInstanceOf[ArrayType]) {
-          // TODO: consider using Array[_] for ArrayType child to avoid
-          // boxing of primitives
-          val baseValue = value.asInstanceOf[Seq[_]]
-          val o = key.asInstanceOf[Int]
-          if (o >= baseValue.size || o < 0) {
-            null
-          } else {
-            baseValue(o)
-          }
-        } else {
-          val baseValue = value.asInstanceOf[Map[Any, _]]
-          baseValue.get(key).orNull
-        }
-      }
-    }
-  }
-}
-
-
-trait GetField extends UnaryExpression {
-  self: Product =>
-
-  type EvaluatedType = Any
-  override def foldable: Boolean = child.foldable
-  override def toString: String = s"$child.${field.name}"
-
-  def field: StructField
-}
-
-object GetField {
-  /**
-   * Returns the resolved `GetField`, and report error if no desired field or over one
-   * desired fields are found.
-   */
-  def apply(
-      expr: Expression,
-      fieldName: String,
-      resolver: Resolver): GetField = {
-    def findField(fields: Array[StructField]): Int = {
-      val checkField = (f: StructField) => resolver(f.name, fieldName)
-      val ordinal = fields.indexWhere(checkField)
-      if (ordinal == -1) {
-        throw new AnalysisException(
-          s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
-      } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
-        throw new AnalysisException(
-          s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}")
-      } else {
-        ordinal
-      }
-    }
-    expr.dataType match {
-      case StructType(fields) =>
-        val ordinal = findField(fields)
-        StructGetField(expr, fields(ordinal), ordinal)
-      case ArrayType(StructType(fields), containsNull) =>
-        val ordinal = findField(fields)
-        ArrayGetField(expr, fields(ordinal), ordinal, containsNull)
-      case otherType =>
-        throw new AnalysisException(s"GetField is not valid on fields of type $otherType")
-    }
-  }
-}
-
-/**
- * Returns the value of fields in the Struct `child`.
- */
-case class StructGetField(child: Expression, field: StructField, ordinal: Int) extends GetField {
-
-  override def dataType: DataType = field.dataType
-  override def nullable: Boolean = child.nullable || field.nullable
-
-  override def eval(input: Row): Any = {
-    val baseValue = child.eval(input).asInstanceOf[Row]
-    if (baseValue == null) null else baseValue(ordinal)
-  }
-}
-
-/**
- * Returns the array of value of fields in the Array of Struct `child`.
- */
-case class ArrayGetField(child: Expression, field: StructField, ordinal: Int, containsNull: Boolean)
-  extends GetField {
-
-  override def dataType: DataType = ArrayType(field.dataType, containsNull)
-  override def nullable: Boolean = child.nullable
-
-  override def eval(input: Row): Any = {
-    val baseValue = child.eval(input).asInstanceOf[Seq[Row]]
-    if (baseValue == null) null else {
-      baseValue.map { row =>
-        if (row == null) null else row(ordinal)
-      }
-    }
-  }
-}
 
 /**
  * Returns an Array containing the evaluation of all children expressions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index e4a60f53d6c09..d7b2f203a6934 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -227,10 +227,8 @@ object NullPropagation extends Rule[LogicalPlan] {
       case e @ Count(Literal(null, _)) => Cast(Literal(0L), e.dataType)
       case e @ IsNull(c) if !c.nullable => Literal.create(false, BooleanType)
       case e @ IsNotNull(c) if !c.nullable => Literal.create(true, BooleanType)
-      case e @ GetItem(Literal(null, _), _) => Literal.create(null, e.dataType)
-      case e @ GetItem(_, Literal(null, _)) => Literal.create(null, e.dataType)
-      case e @ StructGetField(Literal(null, _), _, _) => Literal.create(null, e.dataType)
-      case e @ ArrayGetField(Literal(null, _), _, _, _) => Literal.create(null, e.dataType)
+      case e @ ExtractValue(Literal(null, _), _) => Literal.create(null, e.dataType)
+      case e @ ExtractValue(_, Literal(null, _)) => Literal.create(null, e.dataType)
       case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r)
       case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l)
       case e @ Count(expr) if !expr.nullable => Count(Literal(1))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 4574934d910db..cd54d04814ea4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -160,7 +160,7 @@ object PartialAggregation {
             // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
             // (Should we just turn `GetField` into a `NamedExpression`?)
             namedGroupingExpressions
-              .get(e.transform { case Alias(g: GetField, _) => g })
+              .get(e.transform { case Alias(g: ExtractValue, _) => g })
               .map(_.toAttribute)
               .getOrElse(e)
         }).asInstanceOf[Seq[NamedExpression]]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index ae4620a4e5abf..dbb12d56f9497 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -209,7 +209,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
           // For example, consider "a.b.c", where "a" is resolved to an existing attribute.
           // Then this will add GetField("c", GetField("b", a)), and alias
           // the final expression as "c".
-          val fieldExprs = nestedFields.foldLeft(a: Expression)(GetField(_, _, resolver))
+          val fieldExprs = nestedFields.foldLeft(a: Expression)((expr, fieldName) =>
+            ExtractValue(expr, Literal(fieldName), resolver))
           val aliasName = nestedFields.last
           Some(Alias(fieldExprs, aliasName)())
         } catch {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 88d36d153ceea..04fd261d16aa3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -26,7 +26,7 @@ import org.scalatest.FunSuite
 import org.scalatest.Matchers._
 
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
-import org.apache.spark.sql.catalyst.analysis.UnresolvedGetField
+import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.mathfuncs._
 import org.apache.spark.sql.types._
@@ -880,7 +880,7 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite {
     val row = create_row(
       "^Ba*n",                                // 0
       null.asInstanceOf[UTF8String],          // 1
-      create_row("aa", "bb"),     // 2
+      create_row("aa", "bb"),                 // 2
       Map("aa"->"bb"),                        // 3
       Seq("aa", "bb")                         // 4
     )
@@ -891,54 +891,79 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite {
     val typeMap = MapType(StringType, StringType)
     val typeArray = ArrayType(StringType)
 
-    checkEvaluation(GetItem(BoundReference(3, typeMap, true),
+    checkEvaluation(GetMapValue(BoundReference(3, typeMap, true),
       Literal("aa")), "bb", row)
-    checkEvaluation(GetItem(Literal.create(null, typeMap), Literal("aa")), null, row)
+    checkEvaluation(GetMapValue(Literal.create(null, typeMap), Literal("aa")), null, row)
     checkEvaluation(
-      GetItem(Literal.create(null, typeMap), Literal.create(null, StringType)), null, row)
-    checkEvaluation(GetItem(BoundReference(3, typeMap, true),
+      GetMapValue(Literal.create(null, typeMap), Literal.create(null, StringType)), null, row)
+    checkEvaluation(GetMapValue(BoundReference(3, typeMap, true),
       Literal.create(null, StringType)), null, row)
 
-    checkEvaluation(GetItem(BoundReference(4, typeArray, true),
+    checkEvaluation(GetArrayItem(BoundReference(4, typeArray, true),
       Literal(1)), "bb", row)
-    checkEvaluation(GetItem(Literal.create(null, typeArray), Literal(1)), null, row)
+    checkEvaluation(GetArrayItem(Literal.create(null, typeArray), Literal(1)), null, row)
     checkEvaluation(
-      GetItem(Literal.create(null, typeArray), Literal.create(null, IntegerType)), null, row)
-    checkEvaluation(GetItem(BoundReference(4, typeArray, true),
+      GetArrayItem(Literal.create(null, typeArray), Literal.create(null, IntegerType)), null, row)
+    checkEvaluation(GetArrayItem(BoundReference(4, typeArray, true),
       Literal.create(null, IntegerType)), null, row)
 
-    def quickBuildGetField(expr: Expression, fieldName: String): StructGetField = {
+    def getStructField(expr: Expression, fieldName: String): ExtractValue = {
       expr.dataType match {
         case StructType(fields) =>
           val field = fields.find(_.name == fieldName).get
-          StructGetField(expr, field, fields.indexOf(field))
+          GetStructField(expr, field, fields.indexOf(field))
       }
     }
 
-    def quickResolve(u: UnresolvedGetField): StructGetField = {
-      quickBuildGetField(u.child, u.fieldName)
+    def quickResolve(u: UnresolvedExtractValue): ExtractValue = {
+      ExtractValue(u.child, u.extraction, _ == _)
     }
 
-    checkEvaluation(quickBuildGetField(BoundReference(2, typeS, nullable = true), "a"), "aa", row)
-    checkEvaluation(quickBuildGetField(Literal.create(null, typeS), "a"), null, row)
+    checkEvaluation(getStructField(BoundReference(2, typeS, nullable = true), "a"), "aa", row)
+    checkEvaluation(getStructField(Literal.create(null, typeS), "a"), null, row)
 
     val typeS_notNullable = StructType(
       StructField("a", StringType, nullable = false)
         :: StructField("b", StringType, nullable = false) :: Nil
     )
 
-    assert(quickBuildGetField(BoundReference(2,typeS, nullable = true), "a").nullable === true)
-    assert(quickBuildGetField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable
+    assert(getStructField(BoundReference(2,typeS, nullable = true), "a").nullable === true)
+    assert(getStructField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable
       === false)
 
-    assert(quickBuildGetField(Literal.create(null, typeS), "a").nullable === true)
-    assert(quickBuildGetField(Literal.create(null, typeS_notNullable), "a").nullable === true)
+    assert(getStructField(Literal.create(null, typeS), "a").nullable === true)
+    assert(getStructField(Literal.create(null, typeS_notNullable), "a").nullable === true)
 
-    checkEvaluation('c.map(typeMap).at(3).getItem("aa"), "bb", row)
-    checkEvaluation('c.array(typeArray.elementType).at(4).getItem(1), "bb", row)
+    checkEvaluation(quickResolve('c.map(typeMap).at(3).getItem("aa")), "bb", row)
+    checkEvaluation(quickResolve('c.array(typeArray.elementType).at(4).getItem(1)), "bb", row)
     checkEvaluation(quickResolve('c.struct(typeS).at(2).getField("a")), "aa", row)
   }
 
+  test("error message of ExtractValue") {
+    val structType = StructType(StructField("a", StringType, true) :: Nil)
+    val arrayStructType = ArrayType(structType)
+    val arrayType = ArrayType(StringType)
+    val otherType = StringType
+
+    def checkErrorMessage(
+        childDataType: DataType,
+        fieldDataType: DataType,
+        errorMesage: String): Unit = {
+      val e = intercept[org.apache.spark.sql.AnalysisException] {
+        ExtractValue(
+          Literal.create(null, childDataType),
+          Literal.create(null, fieldDataType),
+          _ == _)
+      }
+      assert(e.getMessage().contains(errorMesage))
+    }
+
+    checkErrorMessage(structType, IntegerType, "Field name should be String Literal")
+    checkErrorMessage(arrayStructType, BooleanType, "Field name should be String Literal")
+    checkErrorMessage(arrayType, StringType, "Array index should be integral type")
+    checkErrorMessage(otherType, StringType, "Can't extract value from")
+  }
+
   test("arithmetic") {
     val row = create_row(1, 2, 3, null)
     val c1 = 'a.int.at(0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 18f92150b0966..6b7d9a85c341b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedGetField, EliminateSubQueries}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedExtractValue, EliminateSubQueries}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -180,10 +180,10 @@ class ConstantFoldingSuite extends PlanTest {
       IsNull(Literal(null)) as 'c1,
       IsNotNull(Literal(null)) as 'c2,
 
-      GetItem(Literal.create(null, ArrayType(IntegerType)), 1) as 'c3,
-      GetItem(
+      UnresolvedExtractValue(Literal.create(null, ArrayType(IntegerType)), 1) as 'c3,
+      UnresolvedExtractValue(
         Literal.create(Seq(1), ArrayType(IntegerType)), Literal.create(null, IntegerType)) as 'c4,
-      UnresolvedGetField(
+      UnresolvedExtractValue(
         Literal.create(null, StructType(Seq(StructField("a", IntegerType, true)))),
         "a") as 'c5,
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 8bbe11b412214..e6e475bb82f82 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -23,7 +23,7 @@ import org.apache.spark.annotation.Experimental
 import org.apache.spark.Logging
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar, UnresolvedGetField}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar, UnresolvedExtractValue}
 import org.apache.spark.sql.types._
 
 
@@ -67,6 +67,19 @@ class Column(protected[sql] val expr: Expression) extends Logging {
 
   override def hashCode: Int = this.expr.hashCode
 
+  /**
+   * Extracts a value or values from a complex type.
+   * The following types of extraction are supported:
+   * - Given an Array, an integer ordinal can be used to retrieve a single value.
+   * - Given a Map, a key of the correct type can be used to retrieve an individual value.
+   * - Given a Struct, a string fieldName can be used to extract that field.
+   * - Given an Array of Structs, a string fieldName can be used to extract filed
+   *   of every struct in that array, and return an Array of fields
+   *
+   * @group expr_ops
+   */
+  def apply(field: Any): Column = UnresolvedExtractValue(expr, Literal(field))
+
   /**
    * Unary minus, i.e. negate the expression.
    * {{{
@@ -529,14 +542,14 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    *
    * @group expr_ops
    */
-  def getItem(key: Any): Column = GetItem(expr, Literal(key))
+  def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key))
 
   /**
    * An expression that gets a field by name in a [[StructType]].
    *
    * @group expr_ops
    */
-  def getField(fieldName: String): Column = UnresolvedGetField(expr, fieldName)
+  def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName))
 
   /**
    * An expression that returns a substring.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 1515e9b843771..d2ca8dccae574 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -449,7 +449,7 @@ class DataFrameSuite extends QueryTest {
       testData.collect().map { case Row(key: Int, value: String) =>
         Row(key, value, key + 1)
       }.toSeq)
-    assert(df.schema.map(_.name).toSeq === Seq("key", "value", "newCol"))
+    assert(df.schema.map(_.name) === Seq("key", "value", "newCol"))
   }
 
   test("replace column using withColumn") {
@@ -484,7 +484,7 @@ class DataFrameSuite extends QueryTest {
       testData.collect().map { case Row(key: Int, value: String) =>
         Row(key, value, key + 1)
       }.toSeq)
-    assert(df.schema.map(_.name).toSeq === Seq("key", "valueRenamed", "newCol"))
+    assert(df.schema.map(_.name) === Seq("key", "valueRenamed", "newCol"))
   }
 
   test("randomSplit") {
@@ -593,4 +593,10 @@ class DataFrameSuite extends QueryTest {
       Row(new java.math.BigDecimal(2.0)))
     TestSQLContext.setConf(SQLConf.CODEGEN_ENABLED, originalValue.toString)
   }
+
+  test("SPARK-7133: Implement struct, array, and map field accessor") {
+    assert(complexData.filter(complexData("a")(0) === 2).count() == 1)
+    assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
+    assert(complexData.filter(complexData("s")("key") === 1).count() == 1)
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index f30b196734c40..04d40bbb2bced 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -1204,7 +1204,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
       nodeToExpr(qualifier) match {
         case UnresolvedAttribute(qualifierName) =>
           UnresolvedAttribute(qualifierName :+ cleanIdentifier(attr))
-        case other => UnresolvedGetField(other, attr)
+        case other => UnresolvedExtractValue(other, Literal(attr))
       }
 
     /* Stars (*) */
@@ -1329,7 +1329,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
 
     /* Complex datatype manipulation */
     case Token("[", child :: ordinal :: Nil) =>
-      GetItem(nodeToExpr(child), nodeToExpr(ordinal))
+      UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
 
     /* Other functions */
     case Token("TOK_FUNCTION", Token(ARRAY(), Nil) :: children) =>

From 4b3bb0e43ca7e1a27308516608419487b6a844e6 Mon Sep 17 00:00:00 2001
From: Kay Ousterhout <kayousterhout@gmail.com>
Date: Fri, 8 May 2015 12:24:06 -0700
Subject: [PATCH 041/320] [SPARK-6627] Finished rename to ShuffleBlockResolver

The previous cleanup-commit for SPARK-6627 renamed ShuffleBlockManager
to ShuffleBlockResolver, but didn't rename the associated subclasses and
variables; this commit does that.

I'm unsure whether it's ok to rename ExternalShuffleBlockManager, since that's technically a public class?

cc pwendell

Author: Kay Ousterhout <kayousterhout@gmail.com>

Closes #5764 from kayousterhout/SPARK-6627 and squashes the following commits:

43add1e [Kay Ousterhout] Spacing fix
96080bf [Kay Ousterhout] Test fixes
d8a5d36 [Kay Ousterhout] [SPARK-6627] Finished rename to ShuffleBlockResolver
---
 ...r.scala => FileShuffleBlockResolver.scala} | 11 ++----
 ....scala => IndexShuffleBlockResolver.scala} | 10 ++---
 .../shuffle/hash/HashShuffleManager.scala     |  6 +--
 .../shuffle/hash/HashShuffleWriter.scala      |  4 +-
 .../shuffle/sort/SortShuffleManager.scala     |  6 +--
 .../shuffle/sort/SortShuffleWriter.scala      | 12 +++---
 .../org/apache/spark/storage/BlockId.scala    |  2 +-
 .../apache/spark/storage/BlockManager.scala   |  5 ++-
 .../spark/storage/DiskBlockManager.scala      |  2 +-
 .../hash/HashShuffleManagerSuite.scala        | 18 ++++-----
 .../shuffle/ExternalShuffleBlockHandler.java  |  6 +--
 ...java => ExternalShuffleBlockResolver.java} | 16 ++++----
 .../ExternalShuffleBlockHandlerSuite.java     | 16 ++++----
 ...=> ExternalShuffleBlockResolverSuite.java} | 30 +++++++--------
 .../shuffle/ExternalShuffleCleanupSuite.java  | 37 ++++++++++---------
 .../shuffle/TestShuffleDataContext.java       |  8 ++--
 16 files changed, 94 insertions(+), 95 deletions(-)
 rename core/src/main/scala/org/apache/spark/shuffle/{FileShuffleBlockManager.scala => FileShuffleBlockResolver.scala} (97%)
 rename core/src/main/scala/org/apache/spark/shuffle/{IndexShuffleBlockManager.scala => IndexShuffleBlockResolver.scala} (93%)
 rename network/shuffle/src/main/java/org/apache/spark/network/shuffle/{ExternalShuffleBlockManager.java => ExternalShuffleBlockResolver.java} (95%)
 rename network/shuffle/src/test/java/org/apache/spark/network/shuffle/{ExternalShuffleBlockManagerSuite.java => ExternalShuffleBlockResolverSuite.java} (77%)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala
similarity index 97%
rename from core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
rename to core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala
index e9b4e2b955dc8..6ad427bcac7f9 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.shuffle
 
 import java.io.File
-import java.nio.ByteBuffer
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
@@ -29,7 +28,7 @@ import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup
+import org.apache.spark.shuffle.FileShuffleBlockResolver.ShuffleFileGroup
 import org.apache.spark.storage._
 import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashMap}
 import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
@@ -64,9 +63,8 @@ private[spark] trait ShuffleWriterGroup {
  * files within a ShuffleFileGroups associated with the block's reducer.
  */
 // Note: Changes to the format in this file should be kept in sync with
-// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getHashBasedShuffleBlockData().
-private[spark]
-class FileShuffleBlockManager(conf: SparkConf)
+// org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getHashBasedShuffleBlockData().
+private[spark] class FileShuffleBlockResolver(conf: SparkConf)
   extends ShuffleBlockResolver with Logging {
 
   private val transportConf = SparkTransportConf.fromSparkConf(conf)
@@ -242,8 +240,7 @@ class FileShuffleBlockManager(conf: SparkConf)
   }
 }
 
-private[spark]
-object FileShuffleBlockManager {
+private[spark] object FileShuffleBlockResolver {
   /**
    * A group of shuffle files, one per reducer.
    * A particular mapper will be assigned a single ShuffleFileGroup to write its output to.
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
similarity index 93%
rename from core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
rename to core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index a1741e2875c16..d9c63b6e7bbb9 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.shuffle
 
 import java.io._
-import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
 
@@ -28,7 +27,7 @@ import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
 
-import IndexShuffleBlockManager.NOOP_REDUCE_ID
+import IndexShuffleBlockResolver.NOOP_REDUCE_ID
 
 /**
  * Create and maintain the shuffle blocks' mapping between logic block and physical file location.
@@ -40,9 +39,8 @@ import IndexShuffleBlockManager.NOOP_REDUCE_ID
  *
  */
 // Note: Changes to the format in this file should be kept in sync with
-// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getSortBasedShuffleBlockData().
-private[spark]
-class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockResolver {
+// org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getSortBasedShuffleBlockData().
+private[spark] class IndexShuffleBlockResolver(conf: SparkConf) extends ShuffleBlockResolver {
 
   private lazy val blockManager = SparkEnv.get.blockManager
 
@@ -115,7 +113,7 @@ class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockResolver {
   override def stop(): Unit = {}
 }
 
-private[spark] object IndexShuffleBlockManager {
+private[spark] object IndexShuffleBlockResolver {
   // No-op reduce ID used in interactions with disk store and BlockObjectWriter.
   // The disk store currently expects puts to relate to a (map, reduce) pair, but in the sort
   // shuffle outputs for several reduces are glommed into a single file.
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
index 2a7df8dd5bd83..c089088f409dd 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
@@ -26,7 +26,7 @@ import org.apache.spark.shuffle._
  */
 private[spark] class HashShuffleManager(conf: SparkConf) extends ShuffleManager {
 
-  private val fileShuffleBlockManager = new FileShuffleBlockManager(conf)
+  private val fileShuffleBlockResolver = new FileShuffleBlockResolver(conf)
 
   /* Register a shuffle with the manager and obtain a handle for it to pass to tasks. */
   override def registerShuffle[K, V, C](
@@ -61,8 +61,8 @@ private[spark] class HashShuffleManager(conf: SparkConf) extends ShuffleManager
     shuffleBlockResolver.removeShuffle(shuffleId)
   }
 
-  override def shuffleBlockResolver: FileShuffleBlockManager = {
-    fileShuffleBlockManager
+  override def shuffleBlockResolver: FileShuffleBlockResolver = {
+    fileShuffleBlockResolver
   }
 
   /** Shut down this ShuffleManager. */
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
index cd27c9e07a3cd..897f0a5dc5bcc 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
@@ -25,7 +25,7 @@ import org.apache.spark.shuffle._
 import org.apache.spark.storage.BlockObjectWriter
 
 private[spark] class HashShuffleWriter[K, V](
-    shuffleBlockManager: FileShuffleBlockManager,
+    shuffleBlockResolver: FileShuffleBlockResolver,
     handle: BaseShuffleHandle[K, V, _],
     mapId: Int,
     context: TaskContext)
@@ -45,7 +45,7 @@ private[spark] class HashShuffleWriter[K, V](
 
   private val blockManager = SparkEnv.get.blockManager
   private val ser = Serializer.getSerializer(dep.serializer.getOrElse(null))
-  private val shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser,
+  private val shuffle = shuffleBlockResolver.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser,
     writeMetrics)
 
   /** Write a bunch of records to this task's output */
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 0497036192154..15842941daaab 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -25,7 +25,7 @@ import org.apache.spark.shuffle.hash.HashShuffleReader
 
 private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager {
 
-  private val indexShuffleBlockManager = new IndexShuffleBlockManager(conf)
+  private val indexShuffleBlockResolver = new IndexShuffleBlockResolver(conf)
   private val shuffleMapNumber = new ConcurrentHashMap[Int, Int]()
 
   /**
@@ -72,8 +72,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
     true
   }
 
-  override def shuffleBlockResolver: IndexShuffleBlockManager = {
-    indexShuffleBlockManager
+  override def shuffleBlockResolver: IndexShuffleBlockResolver = {
+    indexShuffleBlockResolver
   }
 
   /** Shut down this ShuffleManager. */
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index a066435df6fb0..add2656294ca2 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -20,12 +20,12 @@ package org.apache.spark.shuffle.sort
 import org.apache.spark.{MapOutputTracker, SparkEnv, Logging, TaskContext}
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.scheduler.MapStatus
-import org.apache.spark.shuffle.{IndexShuffleBlockManager, ShuffleWriter, BaseShuffleHandle}
+import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleWriter, BaseShuffleHandle}
 import org.apache.spark.storage.ShuffleBlockId
 import org.apache.spark.util.collection.ExternalSorter
 
 private[spark] class SortShuffleWriter[K, V, C](
-    shuffleBlockManager: IndexShuffleBlockManager,
+    shuffleBlockResolver: IndexShuffleBlockResolver,
     handle: BaseShuffleHandle[K, V, C],
     mapId: Int,
     context: TaskContext)
@@ -65,10 +65,10 @@ private[spark] class SortShuffleWriter[K, V, C](
     // Don't bother including the time to open the merged output file in the shuffle write time,
     // because it just opens a single file, so is typically too fast to measure accurately
     // (see SPARK-3570).
-    val outputFile = shuffleBlockManager.getDataFile(dep.shuffleId, mapId)
-    val blockId = ShuffleBlockId(dep.shuffleId, mapId, IndexShuffleBlockManager.NOOP_REDUCE_ID)
+    val outputFile = shuffleBlockResolver.getDataFile(dep.shuffleId, mapId)
+    val blockId = ShuffleBlockId(dep.shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID)
     val partitionLengths = sorter.writePartitionedFile(blockId, context, outputFile)
-    shuffleBlockManager.writeIndexFile(dep.shuffleId, mapId, partitionLengths)
+    shuffleBlockResolver.writeIndexFile(dep.shuffleId, mapId, partitionLengths)
 
     mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)
   }
@@ -84,7 +84,7 @@ private[spark] class SortShuffleWriter[K, V, C](
         return Option(mapStatus)
       } else {
         // The map task failed, so delete our output data.
-        shuffleBlockManager.removeDataByMap(dep.shuffleId, mapId)
+        shuffleBlockResolver.removeDataByMap(dep.shuffleId, mapId)
         return None
       }
     } finally {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index c186fd360fef6..524f6970992a5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -54,7 +54,7 @@ case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
 }
 
 // Format of the shuffle block ids (including data and index) should be kept in sync with
-// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getBlockData().
+// org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getBlockData().
 @DeveloperApi
 case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index a46fecd2274ef..cc794e5c90ffa 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -431,10 +431,11 @@ private[spark] class BlockManager(
     // As an optimization for map output fetches, if the block is for a shuffle, return it
     // without acquiring a lock; the disk store never deletes (recent) items so this should work
     if (blockId.isShuffle) {
-      val shuffleBlockManager = shuffleManager.shuffleBlockResolver
+      val shuffleBlockResolver = shuffleManager.shuffleBlockResolver
       // TODO: This should gracefully handle case where local block is not available. Currently
       // downstream code will throw an exception.
-      Option(shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer())
+      Option(
+        shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer())
     } else {
       doGetLocal(blockId, asBlockResult = false).asInstanceOf[Option[ByteBuffer]]
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 5764c16902c66..2a4447705fa65 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -55,7 +55,7 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon
 
   /** Looks up a file by hashing it into one of our local subdirectories. */
   // This method should be kept in sync with
-  // org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getFile().
+  // org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getFile().
   def getFile(filename: String): File = {
     // Figure out which local directory it hashes to, and which subdirectory in that
     val hash = Utils.nonNegativeHash(filename)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
index 84384bb48999a..0537bf66ad020 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.{SparkEnv, SparkContext, LocalSparkContext, SparkConf}
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.serializer.JavaSerializer
-import org.apache.spark.shuffle.FileShuffleBlockManager
+import org.apache.spark.shuffle.FileShuffleBlockResolver
 import org.apache.spark.storage.{ShuffleBlockId, FileSegment}
 
 class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
@@ -53,10 +53,10 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
 
     sc = new SparkContext("local", "test", conf)
 
-    val shuffleBlockManager =
-      SparkEnv.get.shuffleManager.shuffleBlockResolver.asInstanceOf[FileShuffleBlockManager]
+    val shuffleBlockResolver =
+      SparkEnv.get.shuffleManager.shuffleBlockResolver.asInstanceOf[FileShuffleBlockResolver]
 
-    val shuffle1 = shuffleBlockManager.forMapTask(1, 1, 1, new JavaSerializer(conf),
+    val shuffle1 = shuffleBlockResolver.forMapTask(1, 1, 1, new JavaSerializer(conf),
       new ShuffleWriteMetrics)
     for (writer <- shuffle1.writers) {
       writer.write("test1", "value")
@@ -69,7 +69,7 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
     val shuffle1Segment = shuffle1.writers(0).fileSegment()
     shuffle1.releaseWriters(success = true)
 
-    val shuffle2 = shuffleBlockManager.forMapTask(1, 2, 1, new JavaSerializer(conf),
+    val shuffle2 = shuffleBlockResolver.forMapTask(1, 2, 1, new JavaSerializer(conf),
       new ShuffleWriteMetrics)
 
     for (writer <- shuffle2.writers) {
@@ -88,7 +88,7 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
     // of block based on remaining data in file : which could mess things up when there is
     // concurrent read and writes happening to the same shuffle group.
 
-    val shuffle3 = shuffleBlockManager.forMapTask(1, 3, 1, new JavaSerializer(testConf),
+    val shuffle3 = shuffleBlockResolver.forMapTask(1, 3, 1, new JavaSerializer(testConf),
       new ShuffleWriteMetrics)
     for (writer <- shuffle3.writers) {
       writer.write("test3", "value")
@@ -98,10 +98,10 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext {
       writer.commitAndClose()
     }
     // check before we register.
-    checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0)))
+    checkSegments(shuffle2Segment, shuffleBlockResolver.getBlockData(ShuffleBlockId(1, 2, 0)))
     shuffle3.releaseWriters(success = true)
-    checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0)))
-    shuffleBlockManager.removeShuffle(1)
+    checkSegments(shuffle2Segment, shuffleBlockResolver.getBlockData(ShuffleBlockId(1, 2, 0)))
+    shuffleBlockResolver.removeShuffle(1)
   }
 
   def writeToFile(file: File, numBytes: Int) {
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index 46ca9708621b9..e4faaf8854fc7 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -46,18 +46,18 @@
 public class ExternalShuffleBlockHandler extends RpcHandler {
   private final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockHandler.class);
 
-  private final ExternalShuffleBlockManager blockManager;
+  private final ExternalShuffleBlockResolver blockManager;
   private final OneForOneStreamManager streamManager;
 
   public ExternalShuffleBlockHandler(TransportConf conf) {
-    this(new OneForOneStreamManager(), new ExternalShuffleBlockManager(conf));
+    this(new OneForOneStreamManager(), new ExternalShuffleBlockResolver(conf));
   }
 
   /** Enables mocking out the StreamManager and BlockManager. */
   @VisibleForTesting
   ExternalShuffleBlockHandler(
       OneForOneStreamManager streamManager,
-      ExternalShuffleBlockManager blockManager) {
+      ExternalShuffleBlockResolver blockManager) {
     this.streamManager = streamManager;
     this.blockManager = blockManager;
   }
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
similarity index 95%
rename from network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java
rename to network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 93e6fdd7161fa..dd08e24cade23 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -44,13 +44,13 @@
  * Manages converting shuffle BlockIds into physical segments of local files, from a process outside
  * of Executors. Each Executor must register its own configuration about where it stores its files
  * (local dirs) and how (shuffle manager). The logic for retrieval of individual files is replicated
- * from Spark's FileShuffleBlockManager and IndexShuffleBlockManager.
+ * from Spark's FileShuffleBlockResolver and IndexShuffleBlockResolver.
  *
  * Executors with shuffle file consolidation are not currently supported, as the index is stored in
- * the Executor's memory, unlike the IndexShuffleBlockManager.
+ * the Executor's memory, unlike the IndexShuffleBlockResolver.
  */
-public class ExternalShuffleBlockManager {
-  private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockManager.class);
+public class ExternalShuffleBlockResolver {
+  private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockResolver.class);
 
   // Map containing all registered executors' metadata.
   private final ConcurrentMap<AppExecId, ExecutorShuffleInfo> executors;
@@ -60,7 +60,7 @@ public class ExternalShuffleBlockManager {
 
   private final TransportConf conf;
 
-  public ExternalShuffleBlockManager(TransportConf conf) {
+  public ExternalShuffleBlockResolver(TransportConf conf) {
     this(conf, Executors.newSingleThreadExecutor(
         // Add `spark` prefix because it will run in NM in Yarn mode.
         NettyUtils.createThreadFactory("spark-shuffle-directory-cleaner")));
@@ -68,7 +68,7 @@ public ExternalShuffleBlockManager(TransportConf conf) {
 
   // Allows tests to have more control over when directories are cleaned up.
   @VisibleForTesting
-  ExternalShuffleBlockManager(TransportConf conf, Executor directoryCleaner) {
+  ExternalShuffleBlockResolver(TransportConf conf, Executor directoryCleaner) {
     this.conf = conf;
     this.executors = Maps.newConcurrentMap();
     this.directoryCleaner = directoryCleaner;
@@ -168,7 +168,7 @@ private void deleteExecutorDirs(String[] dirs) {
 
   /**
    * Hash-based shuffle data is simply stored as one file per block.
-   * This logic is from FileShuffleBlockManager.
+   * This logic is from FileShuffleBlockResolver.
    */
   // TODO: Support consolidated hash shuffle files
   private ManagedBuffer getHashBasedShuffleBlockData(ExecutorShuffleInfo executor, String blockId) {
@@ -178,7 +178,7 @@ private ManagedBuffer getHashBasedShuffleBlockData(ExecutorShuffleInfo executor,
 
   /**
    * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file
-   * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockManager,
+   * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockResolver,
    * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId.
    */
   private ManagedBuffer getSortBasedShuffleBlockData(
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
index 3f9fe1681cf27..73374cdc77a23 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -45,14 +45,14 @@ public class ExternalShuffleBlockHandlerSuite {
   TransportClient client = mock(TransportClient.class);
 
   OneForOneStreamManager streamManager;
-  ExternalShuffleBlockManager blockManager;
+  ExternalShuffleBlockResolver blockResolver;
   RpcHandler handler;
 
   @Before
   public void beforeEach() {
     streamManager = mock(OneForOneStreamManager.class);
-    blockManager = mock(ExternalShuffleBlockManager.class);
-    handler = new ExternalShuffleBlockHandler(streamManager, blockManager);
+    blockResolver = mock(ExternalShuffleBlockResolver.class);
+    handler = new ExternalShuffleBlockHandler(streamManager, blockResolver);
   }
 
   @Test
@@ -62,7 +62,7 @@ public void testRegisterExecutor() {
     ExecutorShuffleInfo config = new ExecutorShuffleInfo(new String[] {"/a", "/b"}, 16, "sort");
     byte[] registerMessage = new RegisterExecutor("app0", "exec1", config).toByteArray();
     handler.receive(client, registerMessage, callback);
-    verify(blockManager, times(1)).registerExecutor("app0", "exec1", config);
+    verify(blockResolver, times(1)).registerExecutor("app0", "exec1", config);
 
     verify(callback, times(1)).onSuccess((byte[]) any());
     verify(callback, never()).onFailure((Throwable) any());
@@ -75,12 +75,12 @@ public void testOpenShuffleBlocks() {
 
     ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3]));
     ManagedBuffer block1Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
-    when(blockManager.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker);
-    when(blockManager.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker);
+    when(blockResolver.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker);
+    when(blockResolver.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker);
     byte[] openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" }).toByteArray();
     handler.receive(client, openBlocks, callback);
-    verify(blockManager, times(1)).getBlockData("app0", "exec1", "b0");
-    verify(blockManager, times(1)).getBlockData("app0", "exec1", "b1");
+    verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b0");
+    verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b1");
 
     ArgumentCaptor<byte[]> response = ArgumentCaptor.forClass(byte[].class);
     verify(callback, times(1)).onSuccess(response.capture());
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
similarity index 77%
rename from network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java
rename to network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index dad6428a836fc..d02f4f0fdb682 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -30,7 +30,7 @@
 
 import static org.junit.Assert.*;
 
-public class ExternalShuffleBlockManagerSuite {
+public class ExternalShuffleBlockResolverSuite {
   static String sortBlock0 = "Hello!";
   static String sortBlock1 = "World!";
 
@@ -60,29 +60,29 @@ public static void afterAll() {
 
   @Test
   public void testBadRequests() {
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
+    ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf);
     // Unregistered executor
     try {
-      manager.getBlockData("app0", "exec1", "shuffle_1_1_0");
+      resolver.getBlockData("app0", "exec1", "shuffle_1_1_0");
       fail("Should have failed");
     } catch (RuntimeException e) {
       assertTrue("Bad error message: " + e, e.getMessage().contains("not registered"));
     }
 
     // Invalid shuffle manager
-    manager.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar"));
+    resolver.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar"));
     try {
-      manager.getBlockData("app0", "exec2", "shuffle_1_1_0");
+      resolver.getBlockData("app0", "exec2", "shuffle_1_1_0");
       fail("Should have failed");
     } catch (UnsupportedOperationException e) {
       // pass
     }
 
     // Nonexistent shuffle block
-    manager.registerExecutor("app0", "exec3",
+    resolver.registerExecutor("app0", "exec3",
       dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager"));
     try {
-      manager.getBlockData("app0", "exec3", "shuffle_1_1_0");
+      resolver.getBlockData("app0", "exec3", "shuffle_1_1_0");
       fail("Should have failed");
     } catch (Exception e) {
       // pass
@@ -91,18 +91,18 @@ public void testBadRequests() {
 
   @Test
   public void testSortShuffleBlocks() throws IOException {
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
-    manager.registerExecutor("app0", "exec0",
+    ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf);
+    resolver.registerExecutor("app0", "exec0",
       dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager"));
 
     InputStream block0Stream =
-      manager.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream();
+      resolver.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream();
     String block0 = CharStreams.toString(new InputStreamReader(block0Stream));
     block0Stream.close();
     assertEquals(sortBlock0, block0);
 
     InputStream block1Stream =
-      manager.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream();
+      resolver.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream();
     String block1 = CharStreams.toString(new InputStreamReader(block1Stream));
     block1Stream.close();
     assertEquals(sortBlock1, block1);
@@ -110,18 +110,18 @@ public void testSortShuffleBlocks() throws IOException {
 
   @Test
   public void testHashShuffleBlocks() throws IOException {
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf);
-    manager.registerExecutor("app0", "exec0",
+    ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf);
+    resolver.registerExecutor("app0", "exec0",
       dataContext.createExecutorInfo("org.apache.spark.shuffle.hash.HashShuffleManager"));
 
     InputStream block0Stream =
-      manager.getBlockData("app0", "exec0", "shuffle_1_0_0").createInputStream();
+      resolver.getBlockData("app0", "exec0", "shuffle_1_0_0").createInputStream();
     String block0 = CharStreams.toString(new InputStreamReader(block0Stream));
     block0Stream.close();
     assertEquals(hashBlock0, block0);
 
     InputStream block1Stream =
-      manager.getBlockData("app0", "exec0", "shuffle_1_0_1").createInputStream();
+      resolver.getBlockData("app0", "exec0", "shuffle_1_0_1").createInputStream();
     String block1 = CharStreams.toString(new InputStreamReader(block1Stream));
     block1Stream.close();
     assertEquals(hashBlock1, block1);
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
index 254e3a7a32b98..d9d9c1bf2f17a 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
@@ -41,14 +41,15 @@ public class ExternalShuffleCleanupSuite {
   public void noCleanupAndCleanup() throws IOException {
     TestShuffleDataContext dataContext = createSomeData();
 
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
-    manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
-    manager.applicationRemoved("app", false /* cleanup */);
+    ExternalShuffleBlockResolver resolver =
+      new ExternalShuffleBlockResolver(conf, sameThreadExecutor);
+    resolver.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
+    resolver.applicationRemoved("app", false /* cleanup */);
 
     assertStillThere(dataContext);
 
-    manager.registerExecutor("app", "exec1", dataContext.createExecutorInfo("shuffleMgr"));
-    manager.applicationRemoved("app", true /* cleanup */);
+    resolver.registerExecutor("app", "exec1", dataContext.createExecutorInfo("shuffleMgr"));
+    resolver.applicationRemoved("app", true /* cleanup */);
 
     assertCleanedUp(dataContext);
   }
@@ -64,7 +65,7 @@ public void cleanupUsesExecutor() throws IOException {
       @Override public void execute(Runnable runnable) { cleanupCalled.set(true); }
     };
 
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, noThreadExecutor);
+    ExternalShuffleBlockResolver manager = new ExternalShuffleBlockResolver(conf, noThreadExecutor);
 
     manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr"));
     manager.applicationRemoved("app", true);
@@ -81,11 +82,12 @@ public void cleanupMultipleExecutors() throws IOException {
     TestShuffleDataContext dataContext0 = createSomeData();
     TestShuffleDataContext dataContext1 = createSomeData();
 
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+    ExternalShuffleBlockResolver resolver =
+      new ExternalShuffleBlockResolver(conf, sameThreadExecutor);
 
-    manager.registerExecutor("app", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
-    manager.registerExecutor("app", "exec1", dataContext1.createExecutorInfo("shuffleMgr"));
-    manager.applicationRemoved("app", true);
+    resolver.registerExecutor("app", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
+    resolver.registerExecutor("app", "exec1", dataContext1.createExecutorInfo("shuffleMgr"));
+    resolver.applicationRemoved("app", true);
 
     assertCleanedUp(dataContext0);
     assertCleanedUp(dataContext1);
@@ -96,25 +98,26 @@ public void cleanupOnlyRemovedApp() throws IOException {
     TestShuffleDataContext dataContext0 = createSomeData();
     TestShuffleDataContext dataContext1 = createSomeData();
 
-    ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor);
+    ExternalShuffleBlockResolver resolver =
+      new ExternalShuffleBlockResolver(conf, sameThreadExecutor);
 
-    manager.registerExecutor("app-0", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
-    manager.registerExecutor("app-1", "exec0", dataContext1.createExecutorInfo("shuffleMgr"));
+    resolver.registerExecutor("app-0", "exec0", dataContext0.createExecutorInfo("shuffleMgr"));
+    resolver.registerExecutor("app-1", "exec0", dataContext1.createExecutorInfo("shuffleMgr"));
 
-    manager.applicationRemoved("app-nonexistent", true);
+    resolver.applicationRemoved("app-nonexistent", true);
     assertStillThere(dataContext0);
     assertStillThere(dataContext1);
 
-    manager.applicationRemoved("app-0", true);
+    resolver.applicationRemoved("app-0", true);
     assertCleanedUp(dataContext0);
     assertStillThere(dataContext1);
 
-    manager.applicationRemoved("app-1", true);
+    resolver.applicationRemoved("app-1", true);
     assertCleanedUp(dataContext0);
     assertCleanedUp(dataContext1);
 
     // Make sure it's not an error to cleanup multiple times
-    manager.applicationRemoved("app-1", true);
+    resolver.applicationRemoved("app-1", true);
     assertCleanedUp(dataContext0);
     assertCleanedUp(dataContext1);
   }
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 76639114df5d9..3fdde054ab6c7 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -29,7 +29,7 @@
 
 /**
  * Manages some sort- and hash-based shuffle data, including the creation
- * and cleanup of directories that can be read by the {@link ExternalShuffleBlockManager}.
+ * and cleanup of directories that can be read by the {@link ExternalShuffleBlockResolver}.
  */
 public class TestShuffleDataContext {
   public final String[] localDirs;
@@ -61,9 +61,9 @@ public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) thr
     String blockId = "shuffle_" + shuffleId + "_" + mapId + "_0";
 
     OutputStream dataStream = new FileOutputStream(
-      ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
+      ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId + ".data"));
     DataOutputStream indexStream = new DataOutputStream(new FileOutputStream(
-      ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
+      ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId + ".index")));
 
     long offset = 0;
     indexStream.writeLong(offset);
@@ -82,7 +82,7 @@ public void insertHashShuffleData(int shuffleId, int mapId, byte[][] blocks) thr
     for (int i = 0; i < blocks.length; i ++) {
       String blockId = "shuffle_" + shuffleId + "_" + mapId + "_" + i;
       Files.write(blocks[i],
-        ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId));
+        ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId));
     }
   }
 

From 25889d8d97094325f10fbf52f3b36412f212eeb2 Mon Sep 17 00:00:00 2001
From: Evan Jones <ejones@twitter.com>
Date: Fri, 8 May 2015 22:00:39 +0100
Subject: [PATCH 042/320] [SPARK-7490] [CORE] [Minor]
 MapOutputTracker.deserializeMapStatuses: close input streams

GZIPInputStream allocates native memory that is not freed until close() or
when the finalizer runs. It is best to close() these streams explicitly.

stephenh made the same change for serializeMapStatuses in commit b0d884f0. This is the same change for deserialize.

(I ran the unit test suite! it seems to have passed. I did not make a JIRA since this seems "trivial", and the guidelines suggest it is not required for trivial changes)

Author: Evan Jones <ejones@twitter.com>

Closes #5982 from evanj/master and squashes the following commits:

0d76e85 [Evan Jones] [CORE] MapOutputTracker.deserializeMapStatuses: close input streams
---
 core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 16072283edbe9..018422827e1c8 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -367,7 +367,11 @@ private[spark] object MapOutputTracker extends Logging {
   // Opposite of serializeMapStatuses.
   def deserializeMapStatuses(bytes: Array[Byte]): Array[MapStatus] = {
     val objIn = new ObjectInputStream(new GZIPInputStream(new ByteArrayInputStream(bytes)))
-    objIn.readObject().asInstanceOf[Array[MapStatus]]
+    Utils.tryWithSafeFinally {
+      objIn.readObject().asInstanceOf[Array[MapStatus]]
+    } {
+      objIn.close()
+    }
   }
 
   // Convert an array of MapStatuses to locations and sizes for a given reduce ID. If

From dc71e47f047604e3a9972fc386a462d03bff72cf Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Fri, 8 May 2015 14:06:02 -0700
Subject: [PATCH 043/320] [MINOR] Ignore python/lib/pyspark.zip

Add `python/lib/pyspark.zip` to `.gitignore`. After merging #5580, `python/lib/pyspark.zip` will be generated when building Spark.

Author: zsxwing <zsxwing@gmail.com>

Closes #6017 from zsxwing/gitignore and squashes the following commits:

39b10c4 [zsxwing] Ignore python/lib/pyspark.zip
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d54d21b802be8..3624d12269612 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,6 +65,7 @@ scalastyle.txt
 scalastyle-output.xml
 R-unit-tests.log
 R/unit-tests.out
+python/lib/pyspark.zip
 
 # For Hive
 metastore_db/

From c45c09b015f6f1111fcf9e3c3109a253bbd1d259 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Fri, 8 May 2015 14:06:37 -0700
Subject: [PATCH 044/320] [WEBUI] Remove debug feature for vis.js

`vis.min.js` refers `vis.map` and this even refers `vis.js` which is used for debug `vis.js` but this debug feature is not needed for Spark itself.

This issue is really minor so I don't file this in JIRA.

/CC andrewor14

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #5994 from sarutak/remove-debug-feature-for-vis and squashes the following commits:

8be038f [Kousuke Saruta] Remove vis.map entry from .rat-exclude
7404945 [Kousuke Saruta] Removed debug feature for vis.js
---
 .rat-excludes                                                 | 1 -
 core/src/main/resources/org/apache/spark/ui/static/vis.map    | 1 -
 core/src/main/resources/org/apache/spark/ui/static/vis.min.js | 1 -
 3 files changed, 3 deletions(-)
 delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/vis.map

diff --git a/.rat-excludes b/.rat-excludes
index dc08c4a3673d2..c0f81b57fe09d 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -36,7 +36,6 @@ graphlib-dot.min.js
 sorttable.js
 vis.min.js
 vis.min.css
-vis.map
 .*avsc
 .*txt
 .*json
diff --git a/core/src/main/resources/org/apache/spark/ui/static/vis.map b/core/src/main/resources/org/apache/spark/ui/static/vis.map
deleted file mode 100644
index 9be8b618989b7..0000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/vis.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":"vis.map","sources":["./dist/vis.js"],"names":["root","factory","exports","module","define","amd","this","modules","__webpack_require__","moduleId","installedModules","id","loaded","call","m","c","p","util","DOMutil","DataSet","DataView","Queue","Graph3d","graph3d","Camera","Filter","Point2d","Point3d","Slider","StepNumber","Timeline","Graph2d","timeline","DateUtil","DataStep","Range","stack","TimeStep","components","items","Item","BackgroundItem","BoxItem","PointItem","RangeItem","Component","CurrentTime","CustomTime","DataAxis","GraphGroup","Group","BackgroundGroup","ItemSet","Legend","LineGraph","TimeAxis","Network","network","Edge","Groups","Images","Node","Popup","dotparser","gephiParser","Graph","Error","moment","hammer","Hammer","isNumber","object","Number","isString","String","isDate","Date","match","ASPDateRegex","exec","isNaN","parse","isDataTable","google","visualization","DataTable","randomUUID","S4","Math","floor","random","toString","extend","a","i","len","arguments","length","other","prop","hasOwnProperty","selectiveExtend","props","Array","isArray","selectiveDeepExtend","b","TypeError","constructor","Object","undefined","deepExtend","selectiveNotDeepExtend","indexOf","equalArray","convert","type","Boolean","valueOf","isMoment","toDate","getType","toISOString","value","getAbsoluteLeft","elem","getBoundingClientRect","left","getAbsoluteTop","top","addClassName","className","classes","split","push","join","removeClassName","index","splice","forEach","callback","toArray","array","updateProperty","key","addEventListener","element","action","listener","useCapture","navigator","userAgent","attachEvent","removeEventListener","detachEvent","preventDefault","event","window","returnValue","getTarget","target","srcElement","nodeType","parentNode","option","asBoolean","defaultValue","asNumber","asString","asSize","asElement","hexToRGB","hex","shorthandRegex","replace","r","g","result","parseInt","RGBToHex","red","green","blue","slice","parseColor","color","isValidRGB","rgb","substr","isValidHex","hsv","hexToHSV","lighterColorHSV","h","s","v","min","darkerColorHSV","darkerColorHex","HSVToHex","lighterColorHex","background","border","highlight","hover","RGBToHSV","minRGB","maxRGB","max","d","hue","saturation","cssUtil","cssText","styles","style","trim","parts","keys","map","addCssText","currentStyles","newStyles","removeCssText","removeStyles","HSVToRGB","f","q","t","isOk","test","selectiveBridgeObject","fields","referenceObject","objectTo","create","bridgeObject","mergeOptions","mergeTarget","options","enabled","binarySearchCustom","orderedItems","searchFunction","field","field2","maxIterations","iteration","low","high","middle","item","searchResult","binarySearchValue","sidePreference","prevValue","nextValue","easeInOutQuad","start","end","duration","change","easingFunctions","linear","easeInQuad","easeOutQuad","easeInCubic","easeOutCubic","easeInOutCubic","easeInQuart","easeOutQuart","easeInOutQuart","easeInQuint","easeOutQuint","easeInOutQuint","prepareElements","JSONcontainer","elementType","redundant","used","cleanupElements","removeChild","getSVGElement","svgContainer","shift","document","createElementNS","appendChild","getDOMElement","DOMContainer","insertBefore","createElement","drawPoint","x","y","group","point","drawPoints","setAttributeNS","size","drawBar","width","height","rect","data","_options","_data","_fieldId","fieldId","_type","_subscribers","add","setOptions","prototype","queue","_queue","destroy","on","subscribers","subscribe","off","filter","unsubscribe","_trigger","params","senderId","concat","subscriber","addedIds","me","_addItem","columns","_getColumnNames","row","rows","getNumberOfRows","col","cols","getValue","update","updatedIds","updatedData","addOrUpdate","_updateItem","get","ids","firstType","returnType","allowedValues","itemId","_getItem","order","_sort","_filterFields","_appendRow","getIds","getDataSet","mappedItems","filteredItem","name","sort","av","bv","remove","removedId","removedIds","_remove","clear","maxField","itemField","minField","distinct","values","fieldType","count","exists","types","raw","converted","JSON","stringify","dataTable","getNumberOfColumns","getColumnId","getColumnLabel","addRow","setValue","_ids","_onEvent","apply","setData","viewOptions","getArguments","defaultFilter","dataSet","added","updated","removed","delay","Infinity","_timeout","_extended","_flushIfNeeded","flush","methods","original","method","args","fn","context","entry","clearTimeout","setTimeout","container","SyntaxError","containerElement","margin","defaultXCenter","defaultYCenter","xLabel","yLabel","zLabel","passValueFn","xValueLabel","yValueLabel","zValueLabel","filterLabel","legendLabel","STYLE","DOT","showPerspective","showGrid","keepAspectRatio","showShadow","showGrayBottom","showTooltip","verticalRatio","animationInterval","animationPreload","camera","eye","dataPoints","colX","colY","colZ","colValue","colFilter","xMin","xStep","xMax","yMin","yStep","yMax","zMin","zStep","zMax","valueMin","valueMax","xBarWidth","yBarWidth","colorAxis","colorGrid","colorDot","colorDotBorder","getMouseX","clientX","targetTouches","getMouseY","clientY","Emitter","_setScale","scale","z","xCenter","yCenter","zCenter","setArmLocation","_convert3Dto2D","point3d","translation","_convertPointToTranslation","_convertTranslationToScreen","ax","ay","az","cx","getCameraLocation","cy","cz","sinTx","sin","getCameraRotation","cosTx","cos","sinTy","cosTy","sinTz","cosTz","dx","dy","dz","bx","by","ex","ey","ez","getArmLength","xcenter","frame","canvas","clientWidth","ycenter","_setBackgroundColor","backgroundColor","fill","stroke","strokeWidth","borderColor","borderWidth","borderStyle","BAR","BARCOLOR","BARSIZE","DOTLINE","DOTCOLOR","DOTSIZE","GRID","LINE","SURFACE","_getStyleNumber","styleName","_determineColumnIndexes","counter","column","getDistinctValues","distinctValues","getColumnRange","minMax","_dataInitialize","rawData","_onChange","dataFilter","setOnLoadCallback","redraw","withBars","defaultXBarWidth","dataX","defaultYBarWidth","dataY","xRange","defaultXMin","defaultXMax","defaultXStep","yRange","defaultYMin","defaultYMax","defaultYStep","zRange","defaultZMin","defaultZMax","defaultZStep","valueRange","defaultValueMin","defaultValueMax","_getDataPoints","obj","sortNumber","dataMatrix","xIndex","yIndex","trans","screen","bottom","pointRight","pointTop","pointCross","hasChildNodes","firstChild","position","overflow","noCanvas","fontWeight","padding","innerHTML","onmousedown","_onMouseDown","ontouchstart","_onTouchStart","onmousewheel","_onWheel","ontooltip","_onTooltip","onkeydown","setSize","_resizeCanvas","clientHeight","animationStart","slider","play","animationStop","stop","_resizeCenter","charAt","parseFloat","setCameraPosition","pos","horizontal","vertical","setArmRotation","distance","setArmLength","getCameraPosition","getArmRotation","_readData","_redrawFilter","animationAutoStart","cameraPosition","styleNumber","tooltip","showAnimationControls","_redrawSlider","_redrawClear","_redrawAxis","_redrawDataGrid","_redrawDataLine","_redrawDataBar","_redrawDataDot","_redrawInfo","_redrawLegend","ctx","getContext","clearRect","widthMin","widthMax","dotSize","right","lineWidth","font","ymin","ymax","_hsv2rgb","strokeStyle","beginPath","moveTo","lineTo","strokeRect","fillStyle","closePath","gridLineLen","step","getCurrent","next","textAlign","textBaseline","fillText","label","visible","setValues","setPlayInterval","onchange","getIndex","selectValue","setOnChangeCallback","lineStyle","getLabel","getSelectedValue","from","to","prettyStep","text","xText","yText","zText","offset","xOffset","yOffset","xMin2d","xMax2d","gridLenX","gridLenY","textMargin","armAngle","H","S","V","R","G","B","C","Hi","X","abs","cross","topSideVisible","zAvg","transBottom","dist","sortDepth","aDiff","subtract","bDiff","crossproduct","crossProduct","radius","arc","PI","j","surface","corners","xWidth","yWidth","surfaces","center","avg","transCenter","diff","leftButtonDown","_onMouseUp","which","button","touchDown","startMouseX","startMouseY","startStart","startEnd","startArmRotation","cursor","onmousemove","_onMouseMove","onmouseup","diffX","diffY","horizontalNew","verticalNew","snapAngle","snapValue","round","parameters","emit","boundingRect","mouseX","mouseY","tooltipTimeout","_hideTooltip","dataPoint","_dataPointFromXY","_showTooltip","ontouchmove","_onTouchMove","ontouchend","_onTouchEnd","delta","wheelDelta","detail","oldLength","newLength","_insideTriangle","triangle","sign","as","bs","cs","distMax","closestDataPoint","closestDist","triangle1","triangle2","distX","distY","sqrt","content","line","dot","dom","borderRadius","boxShadow","borderLeft","contentWidth","offsetWidth","contentHeight","offsetHeight","lineHeight","dotWidth","dotHeight","armLocation","armRotation","armLength","cameraLocation","cameraRotation","calculateCameraOrientation","rot","graph","onLoadCallback","loadInBackground","isLoaded","getLoadedProgress","getColumn","getValues","dataView","progress","sub","sum","prev","bar","MozBorderRadius","slide","onclick","togglePlay","onChangeCallback","playTimeout","playInterval","playLoop","setIndex","playNext","interval","clearInterval","getPlayInterval","setPlayLoop","doLoop","onChange","indexToLeft","startClientX","startSlideX","leftToIndex","_start","_end","_step","precision","_current","setRange","setStep","calculatePrettyStep","log10","log","LN10","step1","pow","step2","step5","toPrecision","getStep","groups","forthArgument","defaultOptions","autoResize","orientation","maxHeight","minHeight","_create","body","domProps","emitter","bind","hiddenDates","snap","toScreen","_toScreen","toGlobalScreen","_toGlobalScreen","toTime","_toTime","toGlobalTime","_toGlobalTime","range","timeAxis","currentTime","customTime","itemSet","itemsData","groupsData","setGroups","setItems","Core","newDataSet","initialLoad","dataRange","_getDataRange","setWindow","animate","fit","setSelection","focus","getSelection","itemData","e","getItemRange","dataset","minItem","maxStartItem","maxEndItem","linegraph","getLegend","groupId","isGroupVisible","visibility","convertHiddenOptions","repeat","dateItem","updateHiddenDates","centerContainer","totalRange","pixelTime","startDate","endDate","_d","runUntil","clone","day","dayOfYear","year","dayOffset","date","month","console","removeDuplicates","startHidden","isHidden","endHidden","rangeStart","rangeEnd","hidden","startToFront","endToFront","_applyRange","safeDates","printDates","dates","stepOverHiddenDates","timeStep","previousTime","stepInHidden","currentValue","current","newValue","switchedYear","switchedMonth","switchedDay","time","conversion","getHiddenDurationBetween","correctTimeForHidden","hiddenDuration","totalDuration","partialDuration","accumulatedHiddenDuration","getAccumulatedHiddenDuration","newTime","getHiddenDurationBefore","timeOffset","requiredDuration","previousPoint","snapAwayFromHidden","direction","correctionEnabled","minimumStep","containerHeight","customRange","alignZeros","autoScale","stepIndex","marginStart","marginEnd","deadSpace","majorSteps","minorSteps","setMinimumStep","setFirst","safeSize","minimumStepValue","orderOfMagnitude","minorStepIdx","magnitudefactor","solutionFound","stepSize","niceStart","niceEnd","roundToMinor","marginRange","rounded","hasNext","previous","decimals","exp","cnt","isMajor","now","hours","minutes","seconds","milliseconds","deltaDifference","scaleOffset","moveable","zoomable","zoomMin","zoomMax","touch","animateTimer","_onDragStart","_onDrag","_onDragEnd","_onHold","_onMouseWheel","_onTouch","_onPinch","validateDirection","getPointer","byUser","_cancelAnimation","initStart","initEnd","initTime","anyChanged","dragging","done","changed","newStart","newEnd","getRange","totalHidden","previousDelta","allowDragging","deltaX","deltaY","diffRange","safeStart","safeEnd","pointer","pageX","pageY","pointerDate","_pointerToDate","zoom","centerDate","hiddenDurationBefore","hiddenDurationAfter","move","EPSILON","orderByStart","orderByEnd","aTime","bTime","force","iMax","axis","collidingItem","jj","collision","nostack","subgroups","newTop","subgroup","format","FORMAT","minorLabels","millisecond","second","minute","hour","weekday","majorLabels","setFormat","defaultFormat","first","setFullYear","getFullYear","setMonth","setDate","setHours","setMinutes","setSeconds","setMilliseconds","getMilliseconds","getSeconds","getMinutes","getHours","getDate","getMonth","setScale","newScale","newStep","setAutoScale","enable","stepYear","stepMonth","stepDay","stepHour","stepMinute","stepSecond","stepMillisecond","getLabelMinor","getLabelMajor","getClassName","even","today","isSame","currentWeek","currentMonth","currentYear","locale","lang","toLowerCase","_isResized","resized","_previousWidth","_previousHeight","showCurrentTime","locales","parent","backgroundVertical","title","toUpperCase","substring","currentTimeTimer","setCurrentTime","getCurrentTime","showCustomTime","eventParams","drag","setCustomTime","getCustomTime","stopPropagation","svg","linegraphOptions","showMinorLabels","showMajorLabels","icons","majorLinesOffset","minorLinesOffset","labelOffsetX","labelOffsetY","iconWidth","linegraphSVG","DOMelements","lines","labels","conversionFactor","minWidth","stepPixels","stepPixelsForced","zeroCrossing","lineOffset","master","svgElements","iconsRemoved","amountOfGroups","lineContainer","scrollTop","addGroup","graphOptions","updateGroup","removeGroup","hide","show","display","_redrawGroupIcons","iconHeight","iconOffset","drawIcon","_cleanupIcons","backgroundHorizontal","activeGroups","_calculateCharSize","minorLabelHeight","minorCharHeight","majorLabelHeight","majorCharHeight","minorLineWidth","minorLineHeight","majorLineWidth","majorLineHeight","_redrawLabels","_redrawTitle","amountOfSteps","stepDifference","zeroStepDifference","valueAtZero","marginStartPos","maxLabelSize","_redrawLabel","_redrawLine","titleWidth","titleCharHeight","convertValue","invertedValue","convertedValue","characterHeight","largestWidth","majorCharWidth","minorCharWidth","textMinor","createTextNode","measureCharMinor","textMajor","measureCharMajor","textTitle","measureCharTitle","titleCharWidth","groupsUsingDefaultStyles","usingDefaultStyle","zeroPosition","Line","Bar","Points","setZeroPosition","catmullRom","parametrization","alpha","SVGcontainer","path","fillPath","fillHeight","outline","shaded","barWidth","bar1Height","bar2Height","icon","yAxisOrientation","getYRange","groupData","draw","framework","subgroupIndex","subgroupOrderer","subgroupOrder","visibleItems","byStart","byEnd","checkRangedItems","inner","foreground","marker","Element","getLabelWidth","restack","_updateVisibleItems","markerHeight","lastMarkerHeight","dirty","displayed","_calculateHeight","offsetTop","offsetLeft","ii","repositionY","resetSubgroups","labelSet","setParent","orderSubgroups","_checkIfVisible","sortArray","sortField","removeFromDataSet","removeItem","startArray","endArray","oldVisibleItems","visibleItemsLookup","lowerBound","upperBound","_checkIfVisibleWithReference","initialPosByStart","_traceVisible","initialPosByEnd","repositionX","initialPos","breakCondition","isVisible","align","groupOrder","selectable","editable","updateTime","onAdd","onUpdate","onMove","onRemove","onMoving","itemOptions","itemListeners","_onAdd","_onUpdate","_onRemove","groupListeners","_onAddGroups","_onUpdateGroups","_onRemoveGroups","groupIds","selection","stackDirty","touchParams","UNGROUPED","BACKGROUND","box","_updateUngrouped","backgroundGroup","isFirst","_onSelectItem","_onMultiSelectItem","_onAddItem","addCallback","Function","markDirty","unselect","select","getVisibleItems","rawVisibleItems","_deselect","_orderGroups","visibleInterval","zoomed","lastVisibleInterval","lastWidth","firstGroup","_firstGroup","firstMargin","nonFirstMargin","groupMargin","groupResized","firstGroupIndex","firstGroupId","ungrouped","_getGroupId","getLabelSet","oldItemsData","getItems","_order","getGroups","_getType","_removeItem","groupOptions","oldGroupId","oldGroup","_constructByEndArray","itemFromTarget","dragLeftItem","dragRightItem","itemProps","selected","initialX","newProps","initial","groupFromTarget","_updateItemProps","_moveToGroup","changes","ctrlKey","srcEvent","shiftKey","oldSelection","newSelection","xAbs","newItem","_getItemRange","_item","itemSetFromTarget","side","iconSize","iconSpacing","textArea","scrollableHeight","drawLegendIcons","getComputedStyle","paddingTop","defaultGroup","sampling","graphHeight","barChart","handleOverlap","dataAxis","legend","abortedGraphUpdate","updateSVGheight","updateSVGheightOnResize","lastStart","COUNTER","BarGraphFunctions","yAxisLeft","yAxisRight","legendLeft","legendRight","_updateAllGroupData","_updateGroup","groupsContent","ungroupedCounter","forceGraphUpdate","_updateGraph","rangePerPixelInv","preprocessedGroupData","processedGroupData","groupRanges","changeCalled","minDate","maxDate","_getRelevantData","_applySampling","_convertXcoordinates","_getYRanges","_updateYAxis","MAX_CYCLES","_convertYcoordinates","dataContainer","guess","increment","amountOfPoints","xDistance","pointsPerPixel","ceil","sampledData","barCombinedDataLeft","barCombinedDataRight","getStackedBarYRange","minVal","maxVal","yAxisLeftUsed","yAxisRightUsed","minLeft","minRight","maxLeft","maxRight","ignore","_toggleAxisVisiblity","drawIcons","axisUsed","datapoints","xValue","yValue","extractedData","svgHeight","majorTexts","minorTexts","lineTop","parentChanged","foregroundNextSibling","nextSibling","backgroundNextSibling","_repaintLabels","timeLabelsize","cur","prevLine","xPrev","xFirstMajorLabel","_repaintMinorText","_repaintMajorText","_repaintMajorLine","_repaintMinorLine","leftTime","leftText","widthText","arr","pop","childNodes","nodeValue","_repaintDeleteButton","anchor","deleteButton","_updateContents","template","_updateTitle","removeAttribute","_updateDataAttributes","dataAttributes","attributes","setAttribute","_updateStyle","emptyContent","baseClassName","onTop","itemSubgroup","itemSetHeight","marginLeft","maxWidth","_repaintDragLeft","_repaintDragRight","contentLeft","parentWidth","boxWidth","dragLeft","dragRight","_determineBrowserMethod","_initializeMixinLoaders","renderRefreshRate","renderTimestep","renderTime","physicsTime","runDoubleSpeed","physicsDiscreteStepsize","initializing","triggerFunctions","edit","editEdge","connect","del","nodes","mass","radiusMin","radiusMax","shape","image","fontColor","fontSize","fontFace","fontFill","fontStrokeWidth","fontStrokeColor","level","borderWidthSelected","edges","widthSelectionMultiplier","hoverWidth","labelAlignment","arrowScaleFactor","dash","gap","altLength","inheritColor","configurePhysics","physics","barnesHut","thetaInverted","gravitationalConstant","centralGravity","springLength","springConstant","damping","repulsion","nodeDistance","hierarchicalRepulsion","clustering","initialMaxNodes","clusterThreshold","reduceToNodes","chainThreshold","clusterEdgeThreshold","sectorThreshold","screenSizeThreshold","fontSizeMultiplier","maxFontSize","forceAmplification","distanceAmplification","edgeGrowth","nodeScaling","maxNodeSizeIncrements","activeAreaBoxSize","clusterLevelDifference","navigation","keyboard","speed","dataManipulation","initiallyVisible","hierarchicalLayout","levelSeparation","nodeSpacing","layout","freezeForStabilization","smoothCurves","dynamic","roundness","maxVelocity","minVelocity","stabilize","stabilizationIterations","zoomExtentOnStabilize","dragNetwork","dragNodes","hideEdgesOnDrag","hideNodesOnDrag","constants","pixelRatio","hoverObj","controlNodesActive","navigationHammers","existing","_new","animationSpeed","animationEasingFunction","easingTime","sourceScale","targetScale","sourceTranslation","targetTranslation","lockedOnNodeId","lockedOnNodeOffset","touchTime","images","setOnloadCallback","_redraw","xIncrement","yIncrement","zoomIncrement","_loadPhysicsSystem","_loadSectorSystem","_loadClusterSystem","_loadSelectionSystem","_loadHierarchySystem","_setTranslation","freezeSimulation","cachedFunctions","startedStabilization","stabilized","draggingNodes","calculationNodes","calculationNodeIndices","nodeIndices","canvasTopLeft","canvasBottomRight","pointerPosition","areaCenter","previousScale","nodesData","edgesData","nodesListeners","_addNodes","_updateNodes","_removeNodes","edgesListeners","_addEdges","_updateEdges","_removeEdges","moving","timer","_setupHierarchicalLayout","zoomExtent","startWithClustering","keycharm","hammerUtil","MixinLoader","Activator","browserType","requiresTimeout","_getScriptPath","scripts","getElementsByTagName","src","_getRange","node","minY","maxY","minX","maxX","nodeId","boundingBox","_findCenter","animationOptions","initialZoom","disableStart","zoomLevel","numberOfNodes","factor","yDistance","xZoomLevel","yZoomLevel","animation","_updateNodeIndexList","_clearNodeIndexList","idx","_createManipulatorBar","dotData","DOTToGraph","gephi","gephiData","parseGephi","_setNodes","_setEdges","_putDataInSector","_resetLevels","_stabilize","onEdit","onEditEdge","onConnect","onDelete","editMode","newColorObj","groupname","clickToUse","activator","_createKeyBinds","_loadNavigationControls","_loadManipulationSystem","_configureSmoothCurves","devicePixelRatio","webkitBackingStorePixelRatio","mozBackingStorePixelRatio","msBackingStorePixelRatio","oBackingStorePixelRatio","backingStorePixelRatio","setTransform","pinch","set","_onTap","_onDoubleTap","onTouch","_onMouseMoveTitle","reset","isActive","_moveUp","_yStopMoving","_moveDown","_moveLeft","_xStopMoving","_moveRight","_zoomIn","_stopZoom","_zoomOut","_deleteSelected","_cleanupPhysicsConfiguration","_recursiveDOMDelete","DOMobject","_getPointer","pinched","_getScale","_handleTouch","_handleDragStart","_getNodeAt","_getTranslation","isSelected","_selectObject","nodeIds","objectId","selectionObj","xFixed","yFixed","_handleOnDrag","releaseNode","_XconvertDOMtoCanvas","_XconvertCanvasToDOM","_YconvertDOMtoCanvas","_YconvertCanvasToDOM","_handleDragEnd","_handleTap","_handleDoubleTap","_handleOnHold","_onRelease","_handleOnRelease","_zoom","scaleOld","preScaleDragPointer","DOMtoCanvas","scaleFrac","tx","ty","updateClustersDefault","postScaleDragPointer","canvasToDOM","popupObj","_checkHidePopup","checkShow","_checkShowPopup","popupTimer","edgeId","_getEdgeAt","_hoverObject","_blurObject","lastPopupNode","nodeUnderCursor","overlappingNodes","isOverlappingWith","getTitle","overlappingEdges","edge","connected","popup","setPosition","setText","emitEvent","oldWidth","oldHeight","oldNodesData","_updateSelection","angle","_updateCalculationNodes","_reconnectEdges","_updateValueRange","updateLabels","changedData","setProperties","properties","oldEdgesData","oldEdge","disconnect","showInternalIds","_createBezierNodes","via","sectors","dynamicEdges","setValueRange","w","save","translate","_doInAllSectors","restore","offsetX","offsetY","_drawNodes","alwaysShow","setScaleAndPos","inArea","sMax","_drawEdges","_drawControlNodes","_freezeDefinedNodes","_physicsTick","_restoreFrozenNodes","fixedData","_isMoving","vmin","isMoving","_discreteStepNodes","nodesPresent","discreteStepLimited","discreteStep","vminCorrected","_revertPhysicsState","revertPosition","_revertPhysicsTick","_doInAllActiveSectors","_doInSupportSector","mainMovingStatus","supportMovingStatus","mainMoving","_animationStep","_handleNavigation","startTime","renderStartTime","requestAnimationFrame","mozRequestAnimationFrame","webkitRequestAnimationFrame","msRequestAnimationFrame","iterations","toggleFreeze","parentEdgeId","internalMultiplier","positionBezierNode","mixin","storePosition","storePositions","dataArray","allowedToMoveX","allowedToMoveY","getPositions","focusOnNode","nodePosition","lockedOnNode","easingFunction","animateView","locked","_transitionRedraw","viewCenter","distanceFromCenter","_classicRedraw","_lockedRedraw","active","getScale","getCenterCoordinates","getBoundingBox","networkConstants","fromId","toId","widthSelected","labelDimensions","yLine","dirtyLabel","fromBackup","toBackup","originalFromId","originalToId","widthFixed","lengthFixed","controlNodesEnabled","controlNodes","positions","connectedNode","_drawLine","_drawArrow","_drawArrowCenter","_drawDashLine","attachEdge","detachEdge","xFrom","yFrom","xTo","yTo","xObj","yObj","_getDistanceToEdge","_getColor","colorObj","_getLineWidth","_line","midpointX","midpointY","_pointOnLine","_label","resize","_circle","_pointOnCircle","networkScaleInv","_getViaCoordinates","xVia","yVia","quadraticCurveTo","lineCount","measureText","_rotateForLabelAlignment","_drawLabelRect","_drawLabelText","angleInDegrees","atan2","rotate","lineMargin","fillRect","lineJoin","strokeText","setLineDash","pattern","lineDashOffset","lineCap","dashedLine","percentage","arrow","_pointOnBezier","_findBorderPosition","distanceToBorder","distanceToNodes","difference","threshold","arrowPos","guidePos","edgeSegmentLength","toBorderDist","toBorderPoint","x1","y1","x2","y2","x3","y3","lastX","lastY","minDistance","_getDistanceToLine","px","py","something","u","nodeIdFrom","nodeIdTo","getControlNodeFromPosition","getControlNodeToPosition","_enableControlNodes","_disableControlNodes","_getSelectedControlNode","fromDistance","toDistance","_restoreControlNodes","controlnodeFromPos","fromBorderDist","fromBorderPoint","controlnodeToPos","defaultIndex","DEFAULT","imageBroken","load","url","brokenUrl","img","Image","onload","onerror","error","imagelist","grouplist","reroutedEdges","fontDrawThreshold","horizontalAlignLeft","verticalAlignTop","baseRadiusValue","radiusFixed","preassignedLevel","hierarchyEnumerated","fx","fy","vx","vy","previousState","resetCluster","dynamicEdgesLength","clusterSession","clusterSizeWidthFactor","clusterSizeHeightFactor","clusterSizeRadiusFactor","growthIndicator","networkScale","formationScale","clusterSize","containedNodes","containedEdges","clusterSessions","originalLabel","triggerFunction","groupObj","imageObj","brokenImage","_drawDatabase","_resizeDatabase","_drawBox","_resizeBox","_drawCircle","_resizeCircle","_drawEllipse","_resizeEllipse","_drawImage","_resizeImage","_drawCircularImage","_resizeCircularImage","_drawText","_resizeText","_drawDot","_resizeShape","_drawSquare","_drawTriangle","_drawTriangleDown","_drawStar","_reset","clearSizeCache","_setForce","_addForce","storeState","isFixed","velocity","getDistance","_drawImageAtPosition","globalAlpha","drawImage","_drawImageLabel","getTextSize","_swapToImageResizeWhenImageLoaded","diameter","centerX","centerY","_drawRawCircle","circle","clip","textSize","clusterLineWidth","selectionLineWidth","roundRect","database","defaultSize","ellipse","_drawShape","radiusMultiplier","baseline","labelUnderNode","inView","clearVelocity","updateVelocity","massBeforeClustering","energyBefore","styleAttr","fontFamily","WebkitBorderRadius","whiteSpace","parseDOT","parseGraph","nextPreview","isAlphaNumeric","regexAlphaNumeric","merge","o","addNode","graphs","attr","addEdge","createEdge","getToken","tokenType","TOKENTYPE","NULL","token","isComment","DELIMITER","c2","DELIMITERS","IDENTIFIER","newSyntaxError","UNKNOWN","chop","strict","parseStatements","parseStatement","subgraph","parseSubgraph","parseEdge","parseAttributeStatement","parseNodeStatement","subgraphs","parseAttributeList","message","maxLength","forEach2","array1","array2","elem1","elem2","graphData","dotNode","graphNode","convertEdge","dotEdge","graphEdge","subEdge","{","}","[","]",";","=",",","->","--","gephiJSON","allowedToMove","gEdges","gNodes","gEdge","source","gNode","propagating","onMouseWheel","leftContainer","rightContainer","shadowTop","shadowBottom","shadowTopLeft","shadowBottomLeft","shadowTopRight","shadowBottomRight","_redrawTimer","touchAction","listeners","events","scrollTopMin","redrawCount","_initAutoResize","component","_stopAutoResize","what","getWindow","borderRootHeight","borderRootWidth","autoHeight","centerWidth","_updateScrollTop","visibilityTop","visibilityBottom","MAX_REDRAWS","repaint","_startAutoResize","_onResize","lastHeight","watchTimer","setInterval","_setScrollTop","_getScrollTop","inputHandler","onRelease","isFinal","offTouch","offRelease","custom","back","editNode","addDescription","edgeDescription","editEdgeDescription","createEdgeError","deleteClusterError","CanvasRenderingContext2D","square","s2","ir","triangleDown","star","n","r2d","kappa","ox","oy","xe","ye","xm","ym","bezierCurveTo","wEllipse","hEllipse","ymb","yeb","xt","yt","xi","yi","xl","yl","xr","yr","dashArray","dashLength","dashCount","slope","distRemaining","dashIndex","_catmullRom","_linear","dFill","_catmullRomUniform","p0","p1","p2","p3","bp1","bp2","normalization","d1","d2","d3","A","N","M","d3powA","d2powA","d3pow2A","d2pow2A","d1pow2A","d1powA","Bargraph","barCombinedData","coreDistance","drawData","combinedData","intersections","barPoints","_getDataIntersections","heightOffset","_getSafeDrawData","nextKey","amount","resolved","prevKey","accumulated","groupLabel","_getStackedBarYRange","xpos","PhysicsMixin","ClusterMixin","SectorsMixin","SelectionMixin","ManipulationMixin","NavigationMixin","HierarchicalLayoutMixin","_loadMixin","sourceVariable","mixinFunction","_clearMixin","_loadSelectedForceSolver","_loadPhysicsConfiguration","hubThreshold","activeSector","drawingNode","blockConnectingEdgeSelection","forceAppendSelection","manipulationDiv","editModeDiv","closeDiv","_cleanNavigation","_loadNavigationElements","overlay","prevent_default","_onTapOverlay","windowHammer","_hasParent","deactivate","escListener","activate","unbind","_callbacks","once","self","removeListener","removeAllListeners","callbacks","cb","hasListeners","__WEBPACK_AMD_DEFINE_RESULT__","global","dfl","hasOwnProp","defaultParsingFlags","empty","unusedTokens","unusedInput","charsLeftOver","nullInput","invalidMonth","invalidFormat","userInvalidated","iso","printMsg","msg","suppressDeprecationWarnings","warn","deprecate","firstTime","deprecateSimple","deprecations","padToken","func","leftZeroFill","ordinalizeToken","period","localeData","ordinal","monthDiff","anchor2","adjust","wholeMonthDiff","meridiemFixWrap","meridiem","isPm","meridiemHour","isPM","Locale","Moment","config","skipOverflow","checkOverflow","copyConfig","updateInProgress","updateOffset","Duration","normalizedInput","normalizeObjectUnits","years","quarters","quarter","months","weeks","week","days","_milliseconds","_days","_months","_locale","_bubble","val","_isAMomentObject","_i","_f","_l","_strict","_tzm","_isUTC","_offset","_pf","momentProperties","absRound","number","targetLength","forceSign","output","positiveMomentsDifference","base","res","isAfter","momentsDifference","makeAs","isBefore","createAdder","dur","tmp","addOrSubtractDurationFromMoment","mom","isAdding","setTime","rawSetter","rawGetter","rawMonthSetter","input","compareArrays","dontConvert","lengthDiff","diffs","toInt","normalizeUnits","units","lowered","unitAliases","camelFunctions","inputObject","normalizedProp","makeList","setter","getter","results","utc","argumentForCoercion","coercedNumber","isFinite","daysInMonth","UTC","getUTCDate","weeksInYear","dow","doy","weekOfYear","daysInYear","isLeapYear","_a","MONTH","DATE","YEAR","HOUR","MINUTE","SECOND","MILLISECOND","_overflowDayOfYear","isValid","_isValid","getTime","bigHour","normalizeLocale","chooseLocale","names","loadLocale","oldLocale","hasModule","code","model","local","removeFormattingTokens","makeFormatFunction","formattingTokens","formatTokenFunctions","formatMoment","expandFormat","formatFunctions","invalidDate","replaceLongDateFormatTokens","longDateFormat","localFormattingTokens","lastIndex","getParseRegexForToken","parseTokenOneDigit","parseTokenThreeDigits","parseTokenFourDigits","parseTokenOneToFourDigits","parseTokenSignedNumber","parseTokenSixDigits","parseTokenOneToSixDigits","parseTokenTwoDigits","parseTokenOneToThreeDigits","parseTokenWord","_meridiemParse","parseTokenOffsetMs","parseTokenTimestampMs","parseTokenTimezone","parseTokenT","parseTokenDigits","parseTokenOneOrTwoDigits","_ordinalParse","_ordinalParseLenient","RegExp","regexpEscape","unescapeFormat","utcOffsetFromString","string","possibleTzMatches","tzChunk","parseTimezoneChunker","addTimeToArrayFromToken","datePartArray","monthsParse","_dayOfYear","parseTwoDigitYear","_meridiem","_useUTC","weekdaysParse","_w","invalidWeekday","dayOfYearFromWeekInfo","weekYear","temp","GG","W","E","_week","gg","dayOfYearFromWeeks","dateFromConfig","currentDate","yearToUse","currentDateArray","makeUTCDate","getUTCMonth","_nextDay","makeDate","setUTCMinutes","getUTCMinutes","dateFromObject","getUTCFullYear","makeDateFromStringAndFormat","ISO_8601","parseISO","parsedInput","tokens","skipped","stringLength","totalParsedInputLength","matched","p4","makeDateFromStringAndArray","tempConfig","bestMoment","scoreToBeat","currentScore","NaN","score","l","isoRegex","isoDates","isoTimes","makeDateFromString","createFromInputFallback","makeDateFromInput","aspNetJsonRegex","ms","setUTCFullYear","parseWeekday","substituteTimeAgo","withoutSuffix","isFuture","relativeTime","posNegDuration","relativeTimeThresholds","firstDayOfWeek","firstDayOfWeekOfYear","adjustedMoment","daysToDayOfWeek","daysToAdd","getUTCDay","makeMoment","invalid","preparse","pickBy","moments","dayOfMonth","unit","makeAccessor","keepTime","daysToYears","yearsToDays","makeDurationGetter","makeGlobal","shouldDeprecate","ender","oldGlobalMoment","globalScope","VERSION","aspNetTimeSpanJsonRegex","isoDurationRegex","isoFormat","unitMillisecondFactors","Milliseconds","Seconds","Minutes","Hours","Days","Months","Years","D","Q","DDD","dayofyear","isoweekday","isoweek","weekyear","isoweekyear","ordinalizeTokens","paddedTokens","MMM","monthsShort","MMMM","dd","weekdaysMin","ddd","weekdaysShort","dddd","weekdays","isoWeek","YY","YYYY","YYYYY","YYYYYY","gggg","ggggg","isoWeekYear","GGGG","GGGGG","isoWeekday","SS","SSS","SSSS","Z","utcOffset","ZZ","zoneAbbr","zz","zoneName","unix","lists","DDDD","_monthsShort","monthName","regex","_monthsParse","_longMonthsParse","_shortMonthsParse","_weekdays","_weekdaysShort","_weekdaysMin","weekdayName","_weekdaysParse","_longDateFormat","LTS","LT","L","LL","LLL","LLLL","isLower","_calendar","sameDay","nextDay","nextWeek","lastDay","lastWeek","sameElse","calendar","_relativeTime","future","past","mm","hh","MM","yy","pastFuture","_ordinal","postformat","firstDayOfYear","_invalidDate","ret","parseIso","diffRes","isDuration","inp","version","relativeTimeThreshold","limit","defineLocale","_abbr","abbr","langData","flags","parseZone","isDSTShifted","parsingFlags","invalidAt","keepLocalTime","_dateUtcOffset","inputString","asFloat","that","zoneDiff","humanize","fromNow","sod","startOf","isDST","getDay","endOf","inputMs","isBetween","zone","localAdjust","_changeInProgress","isLocal","isUtcOffset","isUtc","hasAlignedHourOffset","isoWeeksInYear","weekInfo","newLocaleData","getTimezoneOffset","isoWeeks","toJSON","isUTC","withSuffix","toIsoString","asSeconds","asMilliseconds","asMinutes","asHours","asDays","asWeeks","asMonths","asYears","ordinalParse","require","noGlobal","__WEBPACK_AMD_DEFINE_FACTORY__","__WEBPACK_AMD_DEFINE_ARRAY__","_exportFunctions","_bound","keydown","keyup","_keys","fromCharCode","down","handleEvent","up","keyCode","bound","bindAll","getKey","newBindings","_firstTarget","propagatedHandler","_handled","stopped","firstTarget","_handlers","Manager","PropagatingHammer","_on","_off","_destroy","handler","exportName","setTimeoutContext","timeout","bindFn","invokeArrayArg","arg","each","iterator","dest","inherit","child","childP","baseP","_super","boolOrFn","TYPE_FUNCTION","ifUndefined","val1","val2","addEventListeners","splitStr","removeEventListeners","hasParent","inStr","str","find","inArray","findByKey","uniqueArray","prefixed","property","prefix","camelProp","VENDOR_PREFIXES","uniqueId","_uniqueId","getWindowForElement","doc","ownerDocument","defaultView","parentWindow","Input","manager","inputTarget","domHandler","ev","init","createInputInstance","Type","inputClass","SUPPORT_POINTER_EVENTS","PointerEventInput","SUPPORT_ONLY_TOUCH","TouchInput","SUPPORT_TOUCH","TouchMouseInput","MouseInput","eventType","pointersLen","pointers","changedPointersLen","changedPointers","INPUT_START","INPUT_END","INPUT_CANCEL","session","computeInputData","recognize","prevInput","pointersLength","firstInput","simpleCloneInputData","firstMultiple","offsetCenter","getCenter","timeStamp","deltaTime","getAngle","computeDeltaXY","offsetDirection","getDirection","rotation","getRotation","computeIntervalInputData","offsetDelta","prevDelta","velocityX","velocityY","last","lastInterval","COMPUTE_INTERVAL","getVelocity","DIRECTION_NONE","DIRECTION_LEFT","DIRECTION_RIGHT","DIRECTION_UP","DIRECTION_DOWN","PROPS_XY","PROPS_CLIENT_XY","evEl","MOUSE_ELEMENT_EVENTS","evWin","MOUSE_WINDOW_EVENTS","allow","pressed","POINTER_ELEMENT_EVENTS","POINTER_WINDOW_EVENTS","store","pointerEvents","SingleTouchInput","evTarget","SINGLE_TOUCH_TARGET_EVENTS","SINGLE_TOUCH_WINDOW_EVENTS","started","normalizeSingleTouches","all","touches","changedTouches","TOUCH_TARGET_EVENTS","targetIds","getTouches","allTouches","INPUT_MOVE","identifier","changedTargetTouches","mouse","TouchAction","cleanTouchActions","actions","TOUCH_ACTION_NONE","hasPanX","TOUCH_ACTION_PAN_X","hasPanY","TOUCH_ACTION_PAN_Y","TOUCH_ACTION_MANIPULATION","TOUCH_ACTION_AUTO","Recognizer","defaults","state","STATE_POSSIBLE","simultaneous","requireFail","stateStr","STATE_CANCELLED","STATE_ENDED","STATE_CHANGED","STATE_BEGAN","directionStr","getRecognizerByNameIfManager","otherRecognizer","recognizer","AttrRecognizer","PanRecognizer","pX","pY","PinchRecognizer","PressRecognizer","_timer","_input","RotateRecognizer","SwipeRecognizer","TapRecognizer","pTime","pCenter","recognizers","preset","handlers","toggleCssProps","recognizeWith","requireFailure","cssProps","triggerDomEvent","gestureEvent","createEvent","initEvent","gesture","dispatchEvent","TEST_ELEMENT","MOBILE_REGEX","INPUT_TYPE_TOUCH","INPUT_TYPE_PEN","INPUT_TYPE_MOUSE","INPUT_TYPE_KINECT","DIRECTION_HORIZONTAL","DIRECTION_VERTICAL","DIRECTION_ALL","MOUSE_INPUT_MAP","mousedown","mousemove","mouseup","pointerType","POINTER_INPUT_MAP","pointerdown","pointermove","pointerup","pointercancel","pointerout","IE10_POINTER_TYPE_ENUM",2,3,4,5,"MSPointerEvent","removePointer","eventTypeNormalized","isTouch","storeIndex","pointerId","SINGLE_TOUCH_INPUT_MAP","touchstart","touchmove","touchend","touchcancel","TOUCH_INPUT_MAP","inputEvent","inputData","isMouse","PREFIXED_TOUCH_ACTION","NATIVE_TOUCH_ACTION","TOUCH_ACTION_COMPUTE","compute","getTouchAction","preventDefaults","prevented","hasNone","preventSrc","STATE_RECOGNIZED","STATE_FAILED","dropRecognizeWith","dropRequireFailure","hasRequireFailures","canRecognizeWith","withState","tryEmit","canEmit","inputDataClone","process","attrTest","optionPointers","isRecognized","directionTest","hasMoved","inOut","validPointers","validMovement","validTime","taps","posThreshold","validTouchTime","failTimeout","validInterval","validMultiTap","tapCount","domEvents","userSelect","touchSelect","touchCallout","contentZooming","userDrag","tapHighlightColor","STOP","FORCED_STOP","curRecognizer","Tap","Pan","Swipe","Pinch","Rotate","Press","clusterToFit","maxNumberOfNodes","reposition","maxLevels","forceAggregateHubs","normalizeClusterLevels","increaseClusterLevel","repositionNodes","openCluster","isMovingBeforeClustering","_nodeInActiveArea","_sector","_addSector","decreaseClusterLevel","_expandClusterNode","_updateDynamicEdges","updateClusters","zoomDirection","recursive","doNotStart","amountOfNodes","_collapseSector","_formClusters","_openClusters","_openClustersBySize","_aggregateHubs","handleChains","chainPercentage","_getChainFraction","_reduceAmountOfChains","_getHubSize","_formClustersByHub","openAll","containedNodeId","childNode","_expelChildFromParent","_unselectAll","_releaseContainedEdges","_connectEdgeBackToChild","_validateEdges","othersPresent","childNodeId","_repositionBezierNodes","_formClustersByZoom","_forceClustersByZoom","minLength","_addToCluster","_clusterToSmallestNeighbour","smallestNeighbour","smallestNeighbourNode","neighbour","onlyEqual","_formClusterFromHub","hubNode","absorptionSizeOffset","allowCluster","edgesIdarray","amountOfInitialEdges","_addToContainedEdges","_connectEdgeToCluster","_containCircularEdgesFromNode","massBefore","correction","edgeToId","edgeFromId","k","_addToReroutedEdges","maxLevel","minLevel","clusterLevel","targetLevel","average","averageSquared","hubCounter","largestHub","variance","standardDeviation","fraction","reduceAmount","chains","total","_switchToSector","sectorId","sectorType","_switchToActiveSector","_switchToFrozenSector","_switchToSupportSector","_loadLatestSector","_previousSector","_setActiveSector","newId","_forgetLastSector","_createNewSector","_deleteActiveSector","_deleteFrozenSector","_freezeSector","_activateSector","_mergeThisWithFrozen","_collapseThisToSingleCluster","sector","unqiueIdentifier","previousSector","runFunction","argument","returnValues","_doInAllFrozenSectors","_drawSectorNodes","_drawAllSectorNodes","_getNodesOverlappingWith","_getAllNodesOverlappingWith","_pointerToPositionObject","positionObject","_getEdgesOverlappingWith","_getAllEdgesOverlappingWith","_addToSelection","_addToHover","_removeFromSelection","doNotTrigger","_unselectClusters","_getSelectedNodeCount","_getSelectedNode","_getSelectedEdge","_getSelectedEdgeCount","_getSelectedObjectCount","_selectionIsEmpty","_clusterInSelection","_selectConnectedEdges","_hoverConnectedEdges","_unselectConnectedEdges","append","highlightEdges","overrideSelectable","DOM","_manipulationReleaseOverload","_navigationReleaseOverload","getSelectedNodes","edgeIds","getSelectedEdges","idArray","selectNodes","RangeError","selectEdges","_clearManipulatorBar","manipulationDOM","_restoreOverloadedFunctions","functionName","_toggleEditMode","toolbar","boundFunction","edgeBeingEdited","selectedControlNode","_createAddNodeToolbar","_createAddEdgeToolbar","_editNode","_createEditEdgeToolbar","_addNode","_handleConnect","_finishConnect","_selectControlNode","_controlNodeDrag","_releaseControlNode","newNode","_editEdge","alert","supportNodes","targetNode","connectionEdge","connectFromId","_createEdge","defaultData","finalizedData","sourceNodeId","targetNodeId","selectedNodes","selectedEdges","navigationDivs","navigationDivActions","_stopMovement","_zoomExtent","hubsize","definedLevel","undefinedLevel","_changeConstants","_determineLevels","_determineLevelsDirected","distribution","_getDistribution","_placeNodesByHierarchy","minPos","_placeBranchNodes","maxCount","_setLevel","firstNode","_setLevelDirected","parentId","parentLevel","nodeMoved","_restoreNodes","graphToggleSmoothCurves","graph_toggleSmooth","getElementById","graphRepositionNodes","showValueOfRange","graphGenerateOptions","optionsSpecific","radioButton1","radioButton2","checked","backupConstants","optionsDiv","switchConfigurations","radioButton","querySelector","tableId","table","constantsVariableName","valueId","rangeValue","_overWriteGraphConstants","RepulsionMixin","HierarchialRepulsionMixin","BarnesHutMixin","_toggleBarnesHut","barnesHutTree","_initializeForceCalculation","_calculateForces","_calculateGravitationalForces","_calculateNodeForces","_calculateSpringForcesWithSupport","_calculateHierarchicalSpringForces","_calculateSpringForces","supportNodeId","gravity","gravityForce","edgeLength","springForce","combinedClusterSize","node1","node2","node3","_calculateSpringForce","physicsConfiguration","hierarchicalLayoutDirections","parentElement","rangeElement","radioButton3","graph_repositionNodes","graph_generateOptions","dynamicSmoothCurves","nameArray","webpackContext","req","resolve","repulsingForce","a_base","minimumDistance","steepness","springFx","springFy","totalFx","totalFy","correctionFx","correctionFy","nodeCount","_formBarnesHutTree","_getForceContribution","children","NW","NE","SW","SE","parentBranch","childrenCount","centerOfMass","calcSize","MAX_VALUE","sizeDiff","minimumTreeSize","rootSize","halfRootSize","_splitBranch","_placeInTree","_updateBranchMass","totalMass","totalMassInv","biggestSize","skipMassUpdate","_placeInRegion","region","containedNode","_insertRegion","childSize","_drawTree","_drawBranch","branch","webpackPolyfill","paths","__webpack_amd_options__"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AAyBA,cAEA,SAA2CA,EAAMC,GAC1B,gBAAZC,UAA0C,gBAAXC,QACxCA,OAAOD,QAAUD,IACQ,kBAAXG,SAAyBA,OAAOC,IAC9CD,OAAOH,GACmB,gBAAZC,SACdA,QAAa,IAAID,IAEjBD,EAAU,IAAIC,KACbK,KAAM,WACT,MAAgB,UAAUC,GAKhB,QAASC,GAAoBC,GAG5B,GAAGC,EAAiBD,GACnB,MAAOC,GAAiBD,GAAUP,OAGnC,IAAIC,GAASO,EAAiBD,IAC7BP,WACAS,GAAIF,EACJG,QAAQ,EAUT,OANAL,GAAQE,GAAUI,KAAKV,EAAOD,QAASC,EAAQA,EAAOD,QAASM,GAG/DL,EAAOS,QAAS,EAGTT,EAAOD,QAvBf,GAAIQ,KAqCJ,OATAF,GAAoBM,EAAIP,EAGxBC,EAAoBO,EAAIL,EAGxBF,EAAoBQ,EAAI,GAGjBR,EAAoB,KAK/B,SAASL,EAAQD,EAASM,GAG9BN,EAAQe,KAAOT,EAAoB,GACnCN,EAAQgB,QAAUV,EAAoB,GAGtCN,EAAQiB,QAAUX,EAAoB,GACtCN,EAAQkB,SAAWZ,EAAoB,GACvCN,EAAQmB,MAAQb,EAAoB,GAGpCN,EAAQoB,QAAUd,EAAoB,GACtCN,EAAQqB,SACNC,OAAQhB,EAAoB,GAC5BiB,OAAQjB,EAAoB,GAC5BkB,QAASlB,EAAoB,GAC7BmB,QAASnB,EAAoB,IAC7BoB,OAAQpB,EAAoB,IAC5BqB,WAAYrB,EAAoB,KAIlCN,EAAQ4B,SAAWtB,EAAoB,IACvCN,EAAQ6B,QAAUvB,EAAoB,IACtCN,EAAQ8B,UACNC,SAAUzB,EAAoB,IAC9B0B,SAAU1B,EAAoB,IAC9B2B,MAAO3B,EAAoB,IAC3B4B,MAAO5B,EAAoB,IAC3B6B,SAAU7B,EAAoB,IAE9B8B,YACEC,OACEC,KAAMhC,EAAoB,IAC1BiC,eAAgBjC,EAAoB,IACpCkC,QAASlC,EAAoB,IAC7BmC,UAAWnC,EAAoB,IAC/BoC,UAAWpC,EAAoB,KAGjCqC,UAAWrC,EAAoB,IAC/BsC,YAAatC,EAAoB,IACjCuC,WAAYvC,EAAoB,IAChCwC,SAAUxC,EAAoB,IAC9ByC,WAAYzC,EAAoB,IAChC0C,MAAO1C,EAAoB,IAC3B2C,gBAAiB3C,EAAoB,IACrC4C,QAAS5C,EAAoB,IAC7B6C,OAAQ7C,EAAoB,IAC5B8C,UAAW9C,EAAoB,IAC/B+C,SAAU/C,EAAoB,MAKlCN,EAAQsD,QAAUhD,EAAoB,IACtCN,EAAQuD,SACNC,KAAMlD,EAAoB,IAC1BmD,OAAQnD,EAAoB,IAC5BoD,OAAQpD,EAAoB,IAC5BqD,KAAMrD,EAAoB,IAC1BsD,MAAOtD,EAAoB,IAC3BuD,UAAWvD,EAAoB,IAC/BwD,YAAaxD,EAAoB,KAInCN,EAAQ+D,MAAQ,WACd,KAAM,IAAIC,OAAM,+EAIlBhE,EAAQiE,OAAS3D,EAAoB,IACrCN,EAAQkE,OAAS5D,EAAoB,IACrCN,EAAQmE,OAAS7D,EAAoB,KAKjC,SAASL,EAAQD,EAASM,GAM9B,GAAI2D,GAAS3D,EAAoB,GAOjCN,GAAQoE,SAAW,SAASC,GAC1B,MAAQA,aAAkBC,SAA2B,gBAAVD,IAQ7CrE,EAAQuE,SAAW,SAASF,GAC1B,MAAQA,aAAkBG,SAA2B,gBAAVH,IAQ7CrE,EAAQyE,OAAS,SAASJ,GACxB,GAAIA,YAAkBK,MACpB,OAAO,CAEJ,IAAI1E,EAAQuE,SAASF,GAAS,CAEjC,GAAIM,GAAQC,EAAaC,KAAKR,EAC9B,IAAIM,EACF,OAAO,CAEJ,KAAKG,MAAMJ,KAAKK,MAAMV,IACzB,OAAO,EAIX,OAAO,GAQTrE,EAAQgF,YAAc,SAASX,GAC7B,MAA4B,mBAAb,SACVY,OAAoB,eACpBA,OAAOC,cAAuB,WAC9Bb,YAAkBY,QAAOC,cAAcC,WAQ9CnF,EAAQoF,WAAa,WACnB,GAAIC,GAAK,WACP,MAAOC,MAAKC,MACQ,MAAhBD,KAAKE,UACPC,SAAS,IAGb,OACIJ,KAAOA,IAAO,IACVA,IAAO,IACPA,IAAO,IACPA,IAAO,IACPA,IAAOA,IAAOA,KAWxBrF,EAAQ0F,OAAS,SAAUC,GACzB,IAAK,GAAIC,GAAI,EAAGC,EAAMC,UAAUC,OAAYF,EAAJD,EAASA,IAAK,CACpD,GAAII,GAAQF,UAAUF,EACtB,KAAK,GAAIK,KAAQD,GACXA,EAAME,eAAeD,KACvBN,EAAEM,GAAQD,EAAMC,IAKtB,MAAON,IAWT3F,EAAQmG,gBAAkB,SAAUC,EAAOT,GACzC,IAAKU,MAAMC,QAAQF,GACjB,KAAM,IAAIpC,OAAM,uDAGlB,KAAK,GAAI4B,GAAI,EAAGA,EAAIE,UAAUC,OAAQH,IAGpC,IAAK,GAFDI,GAAQF,UAAUF,GAEb9E,EAAI,EAAGA,EAAIsF,EAAML,OAAQjF,IAAK,CACrC,GAAImF,GAAOG,EAAMtF,EACbkF,GAAME,eAAeD,KACvBN,EAAEM,GAAQD,EAAMC,IAItB,MAAON,IAWT3F,EAAQuG,oBAAsB,SAAUH,EAAOT,EAAGa,GAEhD,GAAIH,MAAMC,QAAQE,GAChB,KAAM,IAAIC,WAAU,yCAEtB,KAAK,GAAIb,GAAI,EAAGA,EAAIE,UAAUC,OAAQH,IAEpC,IAAK,GADDI,GAAQF,UAAUF,GACb9E,EAAI,EAAGA,EAAIsF,EAAML,OAAQjF,IAAK,CACrC,GAAImF,GAAOG,EAAMtF,EACjB,IAAIkF,EAAME,eAAeD,GACvB,GAAIO,EAAEP,IAASO,EAAEP,GAAMS,cAAgBC,OACrBC,SAAZjB,EAAEM,KACJN,EAAEM,OAEAN,EAAEM,GAAMS,cAAgBC,OAC1B3G,EAAQ6G,WAAWlB,EAAEM,GAAOO,EAAEP,IAG9BN,EAAEM,GAAQO,EAAEP,OAET,CAAA,GAAII,MAAMC,QAAQE,EAAEP,IACzB,KAAM,IAAIQ,WAAU,yCAEpBd,GAAEM,GAAQO,EAAEP,IAMpB,MAAON,IAWT3F,EAAQ8G,uBAAyB,SAAUV,EAAOT,EAAGa,GAEnD,GAAIH,MAAMC,QAAQE,GAChB,KAAM,IAAIC,WAAU,yCAEtB,KAAK,GAAIR,KAAQO,GACf,GAAIA,EAAEN,eAAeD,IACQ,IAAvBG,EAAMW,QAAQd,GAChB,GAAIO,EAAEP,IAASO,EAAEP,GAAMS,cAAgBC,OACrBC,SAAZjB,EAAEM,KACJN,EAAEM,OAEAN,EAAEM,GAAMS,cAAgBC,OAC1B3G,EAAQ6G,WAAWlB,EAAEM,GAAOO,EAAEP,IAG9BN,EAAEM,GAAQO,EAAEP,OAET,CAAA,GAAII,MAAMC,QAAQE,EAAEP,IACzB,KAAM,IAAIQ,WAAU,yCAEpBd,GAAEM,GAAQO,EAAEP,GAKpB,MAAON,IAST3F,EAAQ6G,WAAa,SAASlB,EAAGa,GAE/B,GAAIH,MAAMC,QAAQE,GAChB,KAAM,IAAIC,WAAU,yCAGtB,KAAK,GAAIR,KAAQO,GACf,GAAIA,EAAEN,eAAeD,GACnB,GAAIO,EAAEP,IAASO,EAAEP,GAAMS,cAAgBC,OACrBC,SAAZjB,EAAEM,KACJN,EAAEM,OAEAN,EAAEM,GAAMS,cAAgBC,OAC1B3G,EAAQ6G,WAAWlB,EAAEM,GAAOO,EAAEP,IAG9BN,EAAEM,GAAQO,EAAEP,OAET,CAAA,GAAII,MAAMC,QAAQE,EAAEP,IACzB,KAAM,IAAIQ,WAAU,yCAEpBd,GAAEM,GAAQO,EAAEP,GAIlB,MAAON,IAUT3F,EAAQgH,WAAa,SAAUrB,EAAGa,GAChC,GAAIb,EAAEI,QAAUS,EAAET,OAAQ,OAAO,CAEjC,KAAK,GAAIH,GAAI,EAAGC,EAAMF,EAAEI,OAAYF,EAAJD,EAASA,IACvC,GAAID,EAAEC,IAAMY,EAAEZ,GAAI,OAAO,CAG3B,QAAO,GAYT5F,EAAQiH,QAAU,SAAS5C,EAAQ6C,GACjC,GAAIvC,EAEJ,IAAeiC,SAAXvC,EACF,MAAOuC,OAET,IAAe,OAAXvC,EACF,MAAO,KAGT,KAAK6C,EACH,MAAO7C,EAET,IAAsB,gBAAT6C,MAAwBA,YAAgB1C,SACnD,KAAM,IAAIR,OAAM,wBAIlB,QAAQkD,GACN,IAAK,UACL,IAAK,UACH,MAAOC,SAAQ9C,EAEjB,KAAK,SACL,IAAK,SACH,MAAOC,QAAOD,EAAO+C,UAEvB,KAAK,SACL,IAAK,SACH,MAAO5C,QAAOH,EAEhB,KAAK,OACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAO,IAAIK,MAAKL,EAElB,IAAIA,YAAkBK,MACpB,MAAO,IAAIA,MAAKL,EAAO+C,UAEpB,IAAInD,EAAOoD,SAAShD,GACvB,MAAO,IAAIK,MAAKL,EAAO+C,UAEzB,IAAIpH,EAAQuE,SAASF,GAEnB,MADAM,GAAQC,EAAaC,KAAKR,GACtBM,EAEK,GAAID,MAAKJ,OAAOK,EAAM,KAGtBV,EAAOI,GAAQiD,QAIxB,MAAM,IAAItD,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,gBAGZ,KAAK,SACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAOJ,GAAOI,EAEhB,IAAIA,YAAkBK,MACpB,MAAOT,GAAOI,EAAO+C,UAElB,IAAInD,EAAOoD,SAAShD,GACvB,MAAOJ,GAAOI,EAEhB,IAAIrE,EAAQuE,SAASF,GAEnB,MADAM,GAAQC,EAAaC,KAAKR,GAGjBJ,EAFLU,EAEYL,OAAOK,EAAM,IAGbN,EAIhB,MAAM,IAAIL,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,gBAGZ,KAAK,UACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAO,IAAIK,MAAKL,EAEb,IAAIA,YAAkBK,MACzB,MAAOL,GAAOmD,aAEX,IAAIvD,EAAOoD,SAAShD,GACvB,MAAOA,GAAOiD,SAASE,aAEpB,IAAIxH,EAAQuE,SAASF,GAExB,MADAM,GAAQC,EAAaC,KAAKR,GACtBM,EAEK,GAAID,MAAKJ,OAAOK,EAAM,KAAK6C,cAG3B,GAAI9C,MAAKL,GAAQmD,aAI1B,MAAM,IAAIxD,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,mBAGZ,KAAK,UACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAO,SAAWA,EAAS,IAExB,IAAIA,YAAkBK,MACzB,MAAO,SAAWL,EAAO+C,UAAY,IAElC,IAAIpH,EAAQuE,SAASF,GAAS,CACjCM,EAAQC,EAAaC,KAAKR,EAC1B,IAAIoD,EAQJ,OALEA,GAFE9C,EAEM,GAAID,MAAKJ,OAAOK,EAAM,KAAKyC,UAG3B,GAAI1C,MAAKL,GAAQ+C,UAEpB,SAAWK,EAAQ,KAG1B,KAAM,IAAIzD,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,mBAGZ,SACE,KAAM,IAAIL,OAAM,iBAAmBkD,EAAO,MAOhD,IAAItC,GAAe,qBAOnB5E,GAAQuH,QAAU,SAASlD,GACzB,GAAI6C,SAAc7C,EAElB,OAAY,UAAR6C,EACY,MAAV7C,EACK,OAELA,YAAkB8C,SACb,UAEL9C,YAAkBC,QACb,SAELD,YAAkBG,QACb,SAEL6B,MAAMC,QAAQjC,GACT,QAELA,YAAkBK,MACb,OAEF,SAEQ,UAARwC,EACA,SAEQ,WAARA,EACA,UAEQ,UAARA,EACA,SAGFA,GASTlH,EAAQ0H,gBAAkB,SAASC,GACjC,MAAOA,GAAKC,wBAAwBC,MAStC7H,EAAQ8H,eAAiB,SAASH,GAChC,MAAOA,GAAKC,wBAAwBG,KAQtC/H,EAAQgI,aAAe,SAASL,EAAMM,GACpC,GAAIC,GAAUP,EAAKM,UAAUE,MAAM,IACD,KAA9BD,EAAQnB,QAAQkB,KAClBC,EAAQE,KAAKH,GACbN,EAAKM,UAAYC,EAAQG,KAAK,OASlCrI,EAAQsI,gBAAkB,SAASX,EAAMM,GACvC,GAAIC,GAAUP,EAAKM,UAAUE,MAAM,KAC/BI,EAAQL,EAAQnB,QAAQkB,EACf,KAATM,IACFL,EAAQM,OAAOD,EAAO,GACtBZ,EAAKM,UAAYC,EAAQG,KAAK,OAalCrI,EAAQyI,QAAU,SAASpE,EAAQqE,GACjC,GAAI9C,GACAC,CACJ,IAAIQ,MAAMC,QAAQjC,GAEhB,IAAKuB,EAAI,EAAGC,EAAMxB,EAAO0B,OAAYF,EAAJD,EAASA,IACxC8C,EAASrE,EAAOuB,GAAIA,EAAGvB,OAKzB,KAAKuB,IAAKvB,GACJA,EAAO6B,eAAeN,IACxB8C,EAASrE,EAAOuB,GAAIA,EAAGvB,IAY/BrE,EAAQ2I,QAAU,SAAStE,GACzB,GAAIuE,KAEJ,KAAK,GAAI3C,KAAQ5B,GACXA,EAAO6B,eAAeD,IAAO2C,EAAMR,KAAK/D,EAAO4B,GAGrD,OAAO2C,IAUT5I,EAAQ6I,eAAiB,SAASxE,EAAQyE,EAAKrB,GAC7C,MAAIpD,GAAOyE,KAASrB,GAClBpD,EAAOyE,GAAOrB,GACP,IAGA,GAYXzH,EAAQ+I,iBAAmB,SAASC,EAASC,EAAQC,EAAUC,GACzDH,EAAQD,kBACSnC,SAAfuC,IACFA,GAAa,GAEA,eAAXF,GAA2BG,UAAUC,UAAUtC,QAAQ,YAAc,IACvEkC,EAAS,kBAGXD,EAAQD,iBAAiBE,EAAQC,EAAUC,IAE3CH,EAAQM,YAAY,KAAOL,EAAQC,IAWvClJ,EAAQuJ,oBAAsB,SAASP,EAASC,EAAQC,EAAUC,GAC5DH,EAAQO,qBAES3C,SAAfuC,IACFA,GAAa,GAEA,eAAXF,GAA2BG,UAAUC,UAAUtC,QAAQ,YAAc,IACvEkC,EAAS,kBAGXD,EAAQO,oBAAoBN,EAAQC,EAAUC,IAG9CH,EAAQQ,YAAY,KAAOP,EAAQC,IAOvClJ,EAAQyJ,eAAiB,SAAUC,GAC5BA,IACHA,EAAQC,OAAOD,OAEbA,EAAMD,eACRC,EAAMD,iBAGNC,EAAME,aAAc,GASxB5J,EAAQ6J,UAAY,SAASH,GAEtBA,IACHA,EAAQC,OAAOD,MAGjB,IAAII,EAcJ,OAZIJ,GAAMI,OACRA,EAASJ,EAAMI,OAERJ,EAAMK,aACbD,EAASJ,EAAMK,YAGMnD,QAAnBkD,EAAOE,UAA4C,GAAnBF,EAAOE,WAEzCF,EAASA,EAAOG,YAGXH,GAGT9J,EAAQkK,UAQRlK,EAAQkK,OAAOC,UAAY,SAAU1C,EAAO2C,GAK1C,MAJoB,kBAAT3C,KACTA,EAAQA,KAGG,MAATA,EACe,GAATA,EAGH2C,GAAgB,MASzBpK,EAAQkK,OAAOG,SAAW,SAAU5C,EAAO2C,GAKzC,MAJoB,kBAAT3C,KACTA,EAAQA,KAGG,MAATA,EACKnD,OAAOmD,IAAU2C,GAAgB,KAGnCA,GAAgB,MASzBpK,EAAQkK,OAAOI,SAAW,SAAU7C,EAAO2C,GAKzC,MAJoB,kBAAT3C,KACTA,EAAQA,KAGG,MAATA,EACKjD,OAAOiD,GAGT2C,GAAgB,MASzBpK,EAAQkK,OAAOK,OAAS,SAAU9C,EAAO2C,GAKvC,MAJoB,kBAAT3C,KACTA,EAAQA,KAGNzH,EAAQuE,SAASkD,GACZA,EAEAzH,EAAQoE,SAASqD,GACjBA,EAAQ,KAGR2C,GAAgB,MAU3BpK,EAAQkK,OAAOM,UAAY,SAAU/C,EAAO2C,GAK1C,MAJoB,kBAAT3C,KACTA,EAAQA,KAGHA,GAAS2C,GAAgB,MASlCpK,EAAQyK,SAAW,SAASC,GAE1B,GAAIC,GAAiB,kCACrBD,GAAMA,EAAIE,QAAQD,EAAgB,SAAS/J,EAAGiK,EAAGC,EAAGtE,GAChD,MAAOqE,GAAIA,EAAIC,EAAIA,EAAItE,EAAIA,GAE/B,IAAIuE,GAAS,4CAA4ClG,KAAK6F,EAC9D,OAAOK,IACHF,EAAGG,SAASD,EAAO,GAAI,IACvBD,EAAGE,SAASD,EAAO,GAAI,IACvBvE,EAAGwE,SAASD,EAAO,GAAI,KACvB,MAWN/K,EAAQiL,SAAW,SAASC,EAAIC,EAAMC,GACpC,MAAO,MAAQ,GAAK,KAAOF,GAAO,KAAOC,GAAS,GAAKC,GAAM3F,SAAS,IAAI4F,MAAM,IASlFrL,EAAQsL,WAAa,SAASC,GAC5B,GAAI1K,EACJ,IAAIb,EAAQuE,SAASgH,GAAQ,CAC3B,GAAIvL,EAAQwL,WAAWD,GAAQ,CAC7B,GAAIE,GAAMF,EAAMG,OAAO,GAAGA,OAAO,EAAEH,EAAMxF,OAAO,GAAGoC,MAAM,IACzDoD,GAAQvL,EAAQiL,SAASQ,EAAI,GAAGA,EAAI,GAAGA,EAAI,IAE7C,GAAIzL,EAAQ2L,WAAWJ,GAAQ,CAC7B,GAAIK,GAAM5L,EAAQ6L,SAASN,GACvBO,GAAmBC,EAAEH,EAAIG,EAAEC,EAAU,IAARJ,EAAII,EAASC,EAAE3G,KAAK4G,IAAI,EAAU,KAARN,EAAIK,IAC3DE,GAAmBJ,EAAEH,EAAIG,EAAEC,EAAE1G,KAAK4G,IAAI,EAAU,KAARN,EAAIK,GAAUA,EAAQ,GAANL,EAAIK,GAC5DG,EAAkBpM,EAAQqM,SAASF,EAAeJ,EAAGI,EAAeJ,EAAGI,EAAeF,GACtFK,EAAkBtM,EAAQqM,SAASP,EAAgBC,EAAED,EAAgBE,EAAEF,EAAgBG,EAE3FpL,IACE0L,WAAYhB,EACZiB,OAAOJ,EACPK,WACEF,WAAWD,EACXE,OAAOJ,GAETM,OACEH,WAAWD,EACXE,OAAOJ,QAKXvL,IACE0L,WAAWhB,EACXiB,OAAOjB,EACPkB,WACEF,WAAWhB,EACXiB,OAAOjB,GAETmB,OACEH,WAAWhB,EACXiB,OAAOjB,QAMb1K,MACAA,EAAE0L,WAAahB,EAAMgB,YAAc,QACnC1L,EAAE2L,OAASjB,EAAMiB,QAAU3L,EAAE0L,WAEzBvM,EAAQuE,SAASgH,EAAMkB,WACzB5L,EAAE4L,WACAD,OAAQjB,EAAMkB,UACdF,WAAYhB,EAAMkB,YAIpB5L,EAAE4L,aACF5L,EAAE4L,UAAUF,WAAahB,EAAMkB,WAAalB,EAAMkB,UAAUF,YAAc1L,EAAE0L,WAC5E1L,EAAE4L,UAAUD,OAASjB,EAAMkB,WAAalB,EAAMkB,UAAUD,QAAU3L,EAAE2L,QAGlExM,EAAQuE,SAASgH,EAAMmB,OACzB7L,EAAE6L,OACAF,OAAQjB,EAAMmB,MACdH,WAAYhB,EAAMmB,QAIpB7L,EAAE6L,SACF7L,EAAE6L,MAAMH,WAAahB,EAAMmB,OAASnB,EAAMmB,MAAMH,YAAc1L,EAAE0L,WAChE1L,EAAE6L,MAAMF,OAASjB,EAAMmB,OAASnB,EAAMmB,MAAMF,QAAU3L,EAAE2L,OAI5D,OAAO3L,IAYTb,EAAQ2M,SAAW,SAASzB,EAAIC,EAAMC,GACpCF,GAAQ,IAAKC,GAAY,IAAKC,GAAU,GACxC,IAAIwB,GAAStH,KAAK4G,IAAIhB,EAAI5F,KAAK4G,IAAIf,EAAMC,IACrCyB,EAASvH,KAAKwH,IAAI5B,EAAI5F,KAAKwH,IAAI3B,EAAMC,GAGzC,IAAIwB,GAAUC,EACZ,OAAQd,EAAE,EAAEC,EAAE,EAAEC,EAAEW,EAIpB,IAAIG,GAAK7B,GAAK0B,EAAUzB,EAAMC,EAASA,GAAMwB,EAAU1B,EAAIC,EAAQC,EAAKF,EACpEa,EAAKb,GAAK0B,EAAU,EAAMxB,GAAMwB,EAAU,EAAI,EAC9CI,EAAM,IAAIjB,EAAIgB,GAAGF,EAASD,IAAS,IACnCK,GAAcJ,EAASD,GAAQC,EAC/BpF,EAAQoF,CACZ,QAAQd,EAAEiB,EAAIhB,EAAEiB,EAAWhB,EAAExE,GAG/B,IAAIyF,IAEF/E,MAAO,SAAUgF,GACf,GAAIC,KAWJ,OATAD,GAAQhF,MAAM,KAAKM,QAAQ,SAAU4E,GACnC,GAAoB,IAAhBA,EAAMC,OAAc,CACtB,GAAIC,GAAQF,EAAMlF,MAAM,KACpBW,EAAMyE,EAAM,GAAGD,OACf7F,EAAQ8F,EAAM,GAAGD,MACrBF,GAAOtE,GAAOrB,KAIX2F,GAIT/E,KAAM,SAAU+E,GACd,MAAOzG,QAAO6G,KAAKJ,GACdK,IAAI,SAAU3E,GACb,MAAOA,GAAM,KAAOsE,EAAOtE,KAE5BT,KAAK,OASdrI,GAAQ0N,WAAa,SAAU1E,EAASmE,GACtC,GAAIQ,GAAgBT,EAAQ/E,MAAMa,EAAQqE,MAAMF,SAC5CS,EAAYV,EAAQ/E,MAAMgF,GAC1BC,EAASpN,EAAQ0F,OAAOiI,EAAeC,EAE3C5E,GAAQqE,MAAMF,QAAUD,EAAQ7E,KAAK+E,IAQvCpN,EAAQ6N,cAAgB,SAAU7E,EAASmE,GACzC,GAAIC,GAASF,EAAQ/E,MAAMa,EAAQqE,MAAMF,SACrCW,EAAeZ,EAAQ/E,MAAMgF,EAEjC,KAAK,GAAIrE,KAAOgF,GACVA,EAAa5H,eAAe4C,UACvBsE,GAAOtE,EAIlBE,GAAQqE,MAAMF,QAAUD,EAAQ7E,KAAK+E,IAWvCpN,EAAQ+N,SAAW,SAAShC,EAAGC,EAAGC,GAChC,GAAIpB,GAAGC,EAAGtE,EAENZ,EAAIN,KAAKC,MAAU,EAAJwG,GACfiC,EAAQ,EAAJjC,EAAQnG,EACZ9E,EAAImL,GAAK,EAAID,GACbiC,EAAIhC,GAAK,EAAI+B,EAAIhC,GACjBkC,EAAIjC,GAAK,GAAK,EAAI+B,GAAKhC,EAE3B,QAAQpG,EAAI,GACV,IAAK,GAAGiF,EAAIoB,EAAGnB,EAAIoD,EAAG1H,EAAI1F,CAAG,MAC7B,KAAK,GAAG+J,EAAIoD,EAAGnD,EAAImB,EAAGzF,EAAI1F,CAAG,MAC7B,KAAK,GAAG+J,EAAI/J,EAAGgK,EAAImB,EAAGzF,EAAI0H,CAAG,MAC7B,KAAK,GAAGrD,EAAI/J,EAAGgK,EAAImD,EAAGzH,EAAIyF,CAAG,MAC7B,KAAK,GAAGpB,EAAIqD,EAAGpD,EAAIhK,EAAG0F,EAAIyF,CAAG,MAC7B,KAAK,GAAGpB,EAAIoB,EAAGnB,EAAIhK,EAAG0F,EAAIyH,EAG5B,OAAQpD,EAAEvF,KAAKC,MAAU,IAAJsF,GAAUC,EAAExF,KAAKC,MAAU,IAAJuF,GAAUtE,EAAElB,KAAKC,MAAU,IAAJiB,KAGrExG,EAAQqM,SAAW,SAASN,EAAGC,EAAGC,GAChC,GAAIR,GAAMzL,EAAQ+N,SAAShC,EAAGC,EAAGC,EACjC,OAAOjM,GAAQiL,SAASQ,EAAIZ,EAAGY,EAAIX,EAAGW,EAAIjF,IAG5CxG,EAAQ6L,SAAW,SAASnB,GAC1B,GAAIe,GAAMzL,EAAQyK,SAASC,EAC3B,OAAO1K,GAAQ2M,SAASlB,EAAIZ,EAAGY,EAAIX,EAAGW,EAAIjF,IAG5CxG,EAAQ2L,WAAa,SAASjB,GAC5B,GAAIyD,GAAO,qCAAqCC,KAAK1D,EACrD,OAAOyD,IAGTnO,EAAQwL,WAAa,SAASC,GAC5BA,EAAMA,EAAIb,QAAQ,IAAI,GACtB,IAAIuD,GAAO,wCAAwCC,KAAK3C,EACxD,OAAO0C,IAUTnO,EAAQqO,sBAAwB,SAASC,EAAQC,GAC/C,GAA8B,gBAAnBA,GAA6B,CAEtC,IAAK,GADDC,GAAW7H,OAAO8H,OAAOF,GACpB3I,EAAI,EAAGA,EAAI0I,EAAOvI,OAAQH,IAC7B2I,EAAgBrI,eAAeoI,EAAO1I,KACC,gBAA9B2I,GAAgBD,EAAO1I,MAChC4I,EAASF,EAAO1I,IAAM5F,EAAQ0O,aAAaH,EAAgBD,EAAO1I,KAIxE,OAAO4I,GAGP,MAAO,OAWXxO,EAAQ0O,aAAe,SAASH,GAC9B,GAA8B,gBAAnBA,GAA6B,CACtC,GAAIC,GAAW7H,OAAO8H,OAAOF,EAC7B,KAAK,GAAI3I,KAAK2I,GACRA,EAAgBrI,eAAeN,IACA,gBAAtB2I,GAAgB3I,KACzB4I,EAAS5I,GAAK5F,EAAQ0O,aAAaH,EAAgB3I,IAIzD,OAAO4I,GAGP,MAAO,OAcXxO,EAAQ2O,aAAe,SAAUC,EAAaC,EAAS3E,GACrD,GAAwBtD,SAApBiI,EAAQ3E,GACV,GAA8B,iBAAnB2E,GAAQ3E,GACjB0E,EAAY1E,GAAQ4E,QAAUD,EAAQ3E,OAEnC,CACH0E,EAAY1E,GAAQ4E,SAAU,CAC9B,KAAK,GAAI7I,KAAQ4I,GAAQ3E,GACnB2E,EAAQ3E,GAAQhE,eAAeD,KACjC2I,EAAY1E,GAAQjE,GAAQ4I,EAAQ3E,GAAQjE,MAmBtDjG,EAAQ+O,mBAAqB,SAASC,EAAcC,EAAgBC,EAAOC,GAMzE,IALA,GAAIC,GAAgB,IAChBC,EAAY,EACZC,EAAM,EACNC,EAAOP,EAAajJ,OAAS,EAEnBwJ,GAAPD,GAA2BF,EAAZC,GAA2B,CAC/C,GAAIG,GAASlK,KAAKC,OAAO+J,EAAMC,GAAQ,GAEnCE,EAAOT,EAAaQ,GACpB/H,EAAoBb,SAAXuI,EAAwBM,EAAKP,GAASO,EAAKP,GAAOC,GAE3DO,EAAeT,EAAexH,EAClC,IAAoB,GAAhBiI,EACF,MAAOF,EAEgB,KAAhBE,EACPJ,EAAME,EAAS,EAGfD,EAAOC,EAAS,EAGlBH,IAGF,MAAO,IAeTrP,EAAQ2P,kBAAoB,SAASX,EAAclF,EAAQoF,EAAOU,GAOhE,IANA,GAIIC,GAAWpI,EAAOqI,EAAWN,EAJ7BJ,EAAgB,IAChBC,EAAY,EACZC,EAAM,EACNC,EAAOP,EAAajJ,OAAS,EAGnBwJ,GAAPD,GAA2BF,EAAZC,GAA2B,CAO/C,GALAG,EAASlK,KAAKC,MAAM,IAAKgK,EAAKD,IAC9BO,EAAYb,EAAa1J,KAAKwH,IAAI,EAAE0C,EAAS,IAAIN,GACjDzH,EAAYuH,EAAaQ,GAAQN,GACjCY,EAAYd,EAAa1J,KAAK4G,IAAI8C,EAAajJ,OAAO,EAAEyJ,EAAS,IAAIN,GAEjEzH,GAASqC,EACX,MAAO0F,EAEJ,IAAgB1F,EAAZ+F,GAAsBpI,EAAQqC,EACrC,MAAyB,UAAlB8F,EAA6BtK,KAAKwH,IAAI,EAAE0C,EAAS,GAAKA,CAE1D,IAAY1F,EAARrC,GAAkBqI,EAAYhG,EACrC,MAAyB,UAAlB8F,EAA6BJ,EAASlK,KAAK4G,IAAI8C,EAAajJ,OAAO,EAAEyJ,EAAS,EAGzE1F,GAARrC,EACF6H,EAAME,EAAS,EAGfD,EAAOC,EAAS,EAGpBH,IAIF,MAAO,IAYTrP,EAAQ+P,cAAgB,SAAU7B,EAAG8B,EAAOC,EAAKC,GAC/C,GAAIC,GAASF,EAAMD,CAEnB,OADA9B,IAAKgC,EAAS,EACN,EAAJhC,EAAciC,EAAO,EAAEjC,EAAEA,EAAI8B,GACjC9B,KACQiC,EAAO,GAAKjC,GAAGA,EAAE,GAAK,GAAK8B,IAUrChQ,EAAQoQ,iBAENC,OAAQ,SAAUnC,GAChB,MAAOA,IAGToC,WAAY,SAAUpC,GACpB,MAAOA,GAAIA,GAGbqC,YAAa,SAAUrC,GACrB,MAAOA,IAAK,EAAIA,IAGlB6B,cAAe,SAAU7B,GACvB,MAAW,GAAJA,EAAS,EAAIA,EAAIA,EAAI,IAAM,EAAI,EAAIA,GAAKA,GAGjDsC,YAAa,SAAUtC,GACrB,MAAOA,GAAIA,EAAIA,GAGjBuC,aAAc,SAAUvC,GACtB,QAAUA,EAAKA,EAAIA,EAAI,GAGzBwC,eAAgB,SAAUxC,GACxB,MAAW,GAAJA,EAAS,EAAIA,EAAIA,EAAIA,GAAKA,EAAI,IAAM,EAAIA,EAAI,IAAM,EAAIA,EAAI,GAAK,GAGxEyC,YAAa,SAAUzC,GACrB,MAAOA,GAAIA,EAAIA,EAAIA,GAGrB0C,aAAc,SAAU1C,GACtB,MAAO,MAAOA,EAAKA,EAAIA,EAAIA,GAG7B2C,eAAgB,SAAU3C,GACxB,MAAW,GAAJA,EAAS,EAAIA,EAAIA,EAAIA,EAAIA,EAAI,EAAI,IAAOA,EAAKA,EAAIA,EAAIA,GAG9D4C,YAAa,SAAU5C,GACrB,MAAOA,GAAIA,EAAIA,EAAIA,EAAIA,GAGzB6C,aAAc,SAAU7C,GACtB,MAAO,KAAOA,EAAKA,EAAIA,EAAIA,EAAIA,GAGjC8C,eAAgB,SAAU9C,GACxB,MAAW,GAAJA,EAAS,GAAKA,EAAIA,EAAIA,EAAIA,EAAIA,EAAI,EAAI,KAAQA,EAAKA,EAAIA,EAAIA,EAAIA,KAMtE,SAASjO,EAAQD,GASrBA,EAAQiR,gBAAkB,SAASC,GAEjC,IAAK,GAAIC,KAAeD,GAClBA,EAAchL,eAAeiL,KAC/BD,EAAcC,GAAaC,UAAYF,EAAcC,GAAaE,KAClEH,EAAcC,GAAaE,UAYjCrR,EAAQsR,gBAAkB,SAASJ,GAEjC,IAAK,GAAIC,KAAeD,GACtB,GAAIA,EAAchL,eAAeiL,IAC3BD,EAAcC,GAAaC,UAAW,CACxC,IAAK,GAAIxL,GAAI,EAAGA,EAAIsL,EAAcC,GAAaC,UAAUrL,OAAQH,IAC/DsL,EAAcC,GAAaC,UAAUxL,GAAGqE,WAAWsH,YAAYL,EAAcC,GAAaC,UAAUxL,GAEtGsL,GAAcC,GAAaC,eAgBnCpR,EAAQwR,cAAgB,SAAUL,EAAaD,EAAeO,GAC5D,GAAIzI,EAqBJ,OAnBIkI,GAAchL,eAAeiL,GAE3BD,EAAcC,GAAaC,UAAUrL,OAAS,GAChDiD,EAAUkI,EAAcC,GAAaC,UAAU,GAC/CF,EAAcC,GAAaC,UAAUM,UAIrC1I,EAAU2I,SAASC,gBAAgB,6BAA8BT,GACjEM,EAAaI,YAAY7I,KAK3BA,EAAU2I,SAASC,gBAAgB,6BAA8BT,GACjED,EAAcC,IAAgBE,QAAUD,cACxCK,EAAaI,YAAY7I,IAE3BkI,EAAcC,GAAaE,KAAKjJ,KAAKY,GAC9BA,GAcThJ,EAAQ8R,cAAgB,SAAUX,EAAaD,EAAea,EAAcC,GAC1E,GAAIhJ,EA+BJ,OA7BIkI,GAAchL,eAAeiL,GAE3BD,EAAcC,GAAaC,UAAUrL,OAAS,GAChDiD,EAAUkI,EAAcC,GAAaC,UAAU,GAC/CF,EAAcC,GAAaC,UAAUM,UAIrC1I,EAAU2I,SAASM,cAAcd,GACZvK,SAAjBoL,EACFD,EAAaC,aAAahJ,EAASgJ,GAGnCD,EAAaF,YAAY7I,KAM7BA,EAAU2I,SAASM,cAAcd,GACjCD,EAAcC,IAAgBE,QAAUD,cACnBxK,SAAjBoL,EACFD,EAAaC,aAAahJ,EAASgJ,GAGnCD,EAAaF,YAAY7I,IAG7BkI,EAAcC,GAAaE,KAAKjJ,KAAKY,GAC9BA,GAkBThJ,EAAQkS,UAAY,SAASC,EAAGC,EAAGC,EAAOnB,EAAeO,GACvD,GAAIa,EAmBJ,OAlBsC,UAAlCD,EAAMxD,QAAQ0D,WAAWlF,OAC3BiF,EAAQtS,EAAQwR,cAAc,SAASN,EAAcO,GACrDa,EAAME,eAAe,KAAM,KAAML,GACjCG,EAAME,eAAe,KAAM,KAAMJ,GACjCE,EAAME,eAAe,KAAM,IAAK,GAAMH,EAAMxD,QAAQ0D,WAAWE,QAG/DH,EAAQtS,EAAQwR,cAAc,OAAON,EAAcO,GACnDa,EAAME,eAAe,KAAM,IAAKL,EAAI,GAAIE,EAAMxD,QAAQ0D,WAAWE,MACjEH,EAAME,eAAe,KAAM,IAAKJ,EAAI,GAAIC,EAAMxD,QAAQ0D,WAAWE,MACjEH,EAAME,eAAe,KAAM,QAASH,EAAMxD,QAAQ0D,WAAWE,MAC7DH,EAAME,eAAe,KAAM,SAAUH,EAAMxD,QAAQ0D,WAAWE,OAGzB7L,SAApCyL,EAAMxD,QAAQ0D,WAAWnF,QAC1BkF,EAAME,eAAe,KAAM,QAASH,EAAMA,MAAMxD,QAAQ0D,WAAWnF,QAErEkF,EAAME,eAAe,KAAM,QAASH,EAAMpK,UAAY,UAC/CqK,GAUTtS,EAAQ0S,QAAU,SAAUP,EAAGC,EAAGO,EAAOC,EAAQ3K,EAAWiJ,EAAeO,GACzE,GAAc,GAAVmB,EAAa,CACF,EAATA,IACFA,GAAU,GACVR,GAAKQ,EAEP,IAAIC,GAAO7S,EAAQwR,cAAc,OAAON,EAAeO,EACvDoB,GAAKL,eAAe,KAAM,IAAKL,EAAI,GAAMQ,GACzCE,EAAKL,eAAe,KAAM,IAAKJ,GAC/BS,EAAKL,eAAe,KAAM,QAASG,GACnCE,EAAKL,eAAe,KAAM,SAAUI,GACpCC,EAAKL,eAAe,KAAM,QAASvK,MAMnC,SAAShI,EAAQD,EAASM,GAgD9B,QAASW,GAAS6R,EAAMjE,GActB,IAZIiE,GAASzM,MAAMC,QAAQwM,IAAU/R,EAAKiE,YAAY8N,KACpDjE,EAAUiE,EACVA,EAAO,MAGT1S,KAAK2S,SAAWlE,MAChBzO,KAAK4S,SACL5S,KAAK6S,SAAW7S,KAAK2S,SAASG,SAAW,KACzC9S,KAAK+S,SAID/S,KAAK2S,SAAS7L,KAChB,IAAK,GAAIgI,KAAS9O,MAAK2S,SAAS7L,KAC9B,GAAI9G,KAAK2S,SAAS7L,KAAKhB,eAAegJ,GAAQ,CAC5C,GAAIzH,GAAQrH,KAAK2S,SAAS7L,KAAKgI,EAE7B9O,MAAK+S,MAAMjE,GADA,QAATzH,GAA4B,WAATA,GAA+B,WAATA,EACvB,OAGAA,EAO5B,GAAIrH,KAAK2S,SAAS9L,QAChB,KAAM,IAAIjD,OAAM,sDAGlB5D,MAAKgT,gBAGDN,GACF1S,KAAKiT,IAAIP,GAGX1S,KAAKkT,WAAWzE,GAtFlB,GAAI9N,GAAOT,EAAoB,GAC3Ba,EAAQb,EAAoB,EAiGhCW,GAAQsS,UAAUD,WAAa,SAASzE,GAClCA,GAA6BjI,SAAlBiI,EAAQ2E,QACjB3E,EAAQ2E,SAAU,EAEhBpT,KAAKqT,SACPrT,KAAKqT,OAAOC,gBACLtT,MAAKqT,SAKTrT,KAAKqT,SACRrT,KAAKqT,OAAStS,EAAMuE,OAAOtF,MACzBwK,SAAU,MAAO,SAAU,aAIF,gBAAlBiE,GAAQ2E,OACjBpT,KAAKqT,OAAOH,WAAWzE,EAAQ2E,UAevCvS,EAAQsS,UAAUI,GAAK,SAASjK,EAAOhB,GACrC,GAAIkL,GAAcxT,KAAKgT,aAAa1J,EAC/BkK,KACHA,KACAxT,KAAKgT,aAAa1J,GAASkK,GAG7BA,EAAYxL,MACVM,SAAUA,KAKdzH,EAAQsS,UAAUM,UAAY5S,EAAQsS,UAAUI,GAOhD1S,EAAQsS,UAAUO,IAAM,SAASpK,EAAOhB,GACtC,GAAIkL,GAAcxT,KAAKgT,aAAa1J,EAChCkK,KACFxT,KAAKgT,aAAa1J,GAASkK,EAAYG,OAAO,SAAU7K,GACtD,MAAQA,GAASR,UAAYA,MAMnCzH,EAAQsS,UAAUS,YAAc/S,EAAQsS,UAAUO,IASlD7S,EAAQsS,UAAUU,SAAW,SAAUvK,EAAOwK,EAAQC,GACpD,GAAa,KAATzK,EACF,KAAM,IAAI1F,OAAM,yBAGlB,IAAI4P,KACAlK,KAAStJ,MAAKgT,eAChBQ,EAAcA,EAAYQ,OAAOhU,KAAKgT,aAAa1J,KAEjD,KAAOtJ,MAAKgT,eACdQ,EAAcA,EAAYQ,OAAOhU,KAAKgT,aAAa,MAGrD,KAAK,GAAIxN,GAAI,EAAGA,EAAIgO,EAAY7N,OAAQH,IAAK,CAC3C,GAAIyO,GAAaT,EAAYhO,EACzByO,GAAW3L,UACb2L,EAAW3L,SAASgB,EAAOwK,EAAQC,GAAY,QAYrDlT,EAAQsS,UAAUF,IAAM,SAAUP,EAAMqB,GACtC,GACI1T,GADA6T,KAEAC,EAAKnU,IAET,IAAIiG,MAAMC,QAAQwM,GAEhB,IAAK,GAAIlN,GAAI,EAAGC,EAAMiN,EAAK/M,OAAYF,EAAJD,EAASA,IAC1CnF,EAAK8T,EAAGC,SAAS1B,EAAKlN,IACtB0O,EAASlM,KAAK3H,OAGb,IAAIM,EAAKiE,YAAY8N,GAGxB,IAAK,GADD2B,GAAUrU,KAAKsU,gBAAgB5B,GAC1B6B,EAAM,EAAGC,EAAO9B,EAAK+B,kBAAyBD,EAAND,EAAYA,IAAO,CAElE,IAAK,GADDlF,MACKqF,EAAM,EAAGC,EAAON,EAAQ1O,OAAcgP,EAAND,EAAYA,IAAO,CAC1D,GAAI5F,GAAQuF,EAAQK,EACpBrF,GAAKP,GAAS4D,EAAKkC,SAASL,EAAKG,GAGnCrU,EAAK8T,EAAGC,SAAS/E,GACjB6E,EAASlM,KAAK3H,OAGb,CAAA,KAAIqS,YAAgBnM,SAMvB,KAAM,IAAI3C,OAAM,mBAJhBvD,GAAK8T,EAAGC,SAAS1B,GACjBwB,EAASlM,KAAK3H,GAUhB,MAJI6T,GAASvO,QACX3F,KAAK6T,SAAS,OAAQ5R,MAAOiS,GAAWH,GAGnCG,GASTrT,EAAQsS,UAAU0B,OAAS,SAAUnC,EAAMqB,GACzC,GAAIG,MACAY,KACAC,KACAZ,EAAKnU,KACL8S,EAAUqB,EAAGtB,SAEbmC,EAAc,SAAU3F,GAC1B,GAAIhP,GAAKgP,EAAKyD,EACVqB,GAAGvB,MAAMvS,IAEXA,EAAK8T,EAAGc,YAAY5F,GACpByF,EAAW9M,KAAK3H,GAChB0U,EAAY/M,KAAKqH,KAIjBhP,EAAK8T,EAAGC,SAAS/E,GACjB6E,EAASlM,KAAK3H,IAIlB,IAAI4F,MAAMC,QAAQwM,GAEhB,IAAK,GAAIlN,GAAI,EAAGC,EAAMiN,EAAK/M,OAAYF,EAAJD,EAASA,IAC1CwP,EAAYtC,EAAKlN,QAGhB,IAAI7E,EAAKiE,YAAY8N,GAGxB,IAAK,GADD2B,GAAUrU,KAAKsU,gBAAgB5B,GAC1B6B,EAAM,EAAGC,EAAO9B,EAAK+B,kBAAyBD,EAAND,EAAYA,IAAO,CAElE,IAAK,GADDlF,MACKqF,EAAM,EAAGC,EAAON,EAAQ1O,OAAcgP,EAAND,EAAYA,IAAO,CAC1D,GAAI5F,GAAQuF,EAAQK,EACpBrF,GAAKP,GAAS4D,EAAKkC,SAASL,EAAKG,GAGnCM,EAAY3F,OAGX,CAAA,KAAIqD,YAAgBnM,SAKvB,KAAM,IAAI3C,OAAM,mBAHhBoR,GAAYtC,GAad,MAPIwB,GAASvO,QACX3F,KAAK6T,SAAS,OAAQ5R,MAAOiS,GAAWH,GAEtCe,EAAWnP,QACb3F,KAAK6T,SAAS,UAAW5R,MAAO6S,EAAYpC,KAAMqC,GAAchB,GAG3DG,EAASF,OAAOc,IAsCzBjU,EAAQsS,UAAU+B,IAAM,WACtB,GAGI7U,GAAI8U,EAAK1G,EAASiE,EAHlByB,EAAKnU,KAILoV,EAAYzU,EAAKwG,QAAQzB,UAAU,GACtB,WAAb0P,GAAsC,UAAbA,GAE3B/U,EAAKqF,UAAU,GACf+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,IAEG,SAAb0P,GAEPD,EAAMzP,UAAU,GAChB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,KAIjB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,GAInB,IAAI2P,EACJ,IAAI5G,GAAWA,EAAQ4G,WAAY,CACjC,GAAIC,IAAiB,YAAa,QAAS,SAG3C,IAFAD,EAA0D,IAA7CC,EAAc3O,QAAQ8H,EAAQ4G,YAAoB,QAAU5G,EAAQ4G,WAE7E3C,GAAS2C,GAAc1U,EAAKwG,QAAQuL,GACtC,KAAM,IAAI9O,OAAM,6BAA+BjD,EAAKwG,QAAQuL,GAAQ,sDACVjE,EAAQ3H,KAAO,IAE3E,IAAkB,aAAduO,IAA8B1U,EAAKiE,YAAY8N,GACjD,KAAM,IAAI9O,OAAM,6EAKlByR,GADO3C,GAC6B,aAAtB/R,EAAKwG,QAAQuL,GAAwB,YAGtC,OAIf,IAEgBrD,GAAMkG,EAAQ/P,EAAGC,EAF7BqB,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAChD6M,EAASlF,GAAWA,EAAQkF,OAC5B1R,IAGJ,IAAUuE,QAANnG,EAEFgP,EAAO8E,EAAGqB,SAASnV,EAAIyG,GACnB6M,IAAWA,EAAOtE,KACpBA,EAAO,UAGN,IAAW7I,QAAP2O,EAEP,IAAK3P,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrC6J,EAAO8E,EAAGqB,SAASL,EAAI3P,GAAIsB,KACtB6M,GAAUA,EAAOtE,KACpBpN,EAAM+F,KAAKqH,OAMf,KAAKkG,IAAUvV,MAAK4S,MACd5S,KAAK4S,MAAM9M,eAAeyP,KAC5BlG,EAAO8E,EAAGqB,SAASD,EAAQzO,KACtB6M,GAAUA,EAAOtE,KACpBpN,EAAM+F,KAAKqH,GAYnB,IALIZ,GAAWA,EAAQgH,OAAejP,QAANnG,GAC9BL,KAAK0V,MAAMzT,EAAOwM,EAAQgH,OAIxBhH,GAAWA,EAAQP,OAAQ,CAC7B,GAAIA,GAASO,EAAQP,MACrB,IAAU1H,QAANnG,EACFgP,EAAOrP,KAAK2V,cAActG,EAAMnB,OAGhC,KAAK1I,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvCvD,EAAMuD,GAAKxF,KAAK2V,cAAc1T,EAAMuD,GAAI0I,GAM9C,GAAkB,aAAdmH,EAA2B,CAC7B,GAAIhB,GAAUrU,KAAKsU,gBAAgB5B,EACnC,IAAUlM,QAANnG,EAEF8T,EAAGyB,WAAWlD,EAAM2B,EAAShF,OAI7B,KAAK7J,EAAI,EAAGA,EAAIvD,EAAM0D,OAAQH,IAC5B2O,EAAGyB,WAAWlD,EAAM2B,EAASpS,EAAMuD,GAGvC,OAAOkN,GAEJ,GAAkB,UAAd2C,EAAwB,CAC/B,GAAI1K,KACJ,KAAKnF,EAAI,EAAGA,EAAIvD,EAAM0D,OAAQH,IAC5BmF,EAAO1I,EAAMuD,GAAGnF,IAAM4B,EAAMuD,EAE9B,OAAOmF,GAIP,GAAUnE,QAANnG,EAEF,MAAOgP,EAIP,IAAIqD,EAAM,CAER,IAAKlN,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvCkN,EAAK1K,KAAK/F,EAAMuD,GAElB,OAAOkN,GAIP,MAAOzQ,IAcfpB,EAAQsS,UAAU0C,OAAS,SAAUpH,GACnC,GAIIjJ,GACAC,EACApF,EACAgP,EACApN,EARAyQ,EAAO1S,KAAK4S,MACZe,EAASlF,GAAWA,EAAQkF,OAC5B8B,EAAQhH,GAAWA,EAAQgH,MAC3B3O,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAMhDqO,IAEJ,IAAIxB,EAEF,GAAI8B,EAAO,CAETxT,IACA,KAAK5B,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,GACrB6M,EAAOtE,IACTpN,EAAM+F,KAAKqH,GAOjB,KAFArP,KAAK0V,MAAMzT,EAAOwT,GAEbjQ,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvC2P,EAAI3P,GAAKvD,EAAMuD,GAAGxF,KAAK6S,cAKzB,KAAKxS,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,GACrB6M,EAAOtE,IACT8F,EAAInN,KAAKqH,EAAKrP,KAAK6S,gBAQ3B,IAAI4C,EAAO,CAETxT,IACA,KAAK5B,IAAMqS,GACLA,EAAK5M,eAAezF,IACtB4B,EAAM+F,KAAK0K,EAAKrS,GAMpB,KAFAL,KAAK0V,MAAMzT,EAAOwT,GAEbjQ,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvC2P,EAAI3P,GAAKvD,EAAMuD,GAAGxF,KAAK6S,cAKzB,KAAKxS,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOqD,EAAKrS,GACZ8U,EAAInN,KAAKqH,EAAKrP,KAAK6S,WAM3B,OAAOsC,IAOTtU,EAAQsS,UAAU2C,WAAa,WAC7B,MAAO9V,OAaTa,EAAQsS,UAAU9K,QAAU,SAAUC,EAAUmG,GAC9C,GAGIY,GACAhP,EAJAsT,EAASlF,GAAWA,EAAQkF,OAC5B7M,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAChD4L,EAAO1S,KAAK4S,KAIhB,IAAInE,GAAWA,EAAQgH,MAIrB,IAAK,GAFDxT,GAAQjC,KAAKkV,IAAIzG,GAEZjJ,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IAC3C6J,EAAOpN,EAAMuD,GACbnF,EAAKgP,EAAKrP,KAAK6S,UACfvK,EAAS+G,EAAMhP,OAKjB,KAAKA,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,KACpB6M,GAAUA,EAAOtE,KACpB/G,EAAS+G,EAAMhP,KAkBzBQ,EAAQsS,UAAU9F,IAAM,SAAU/E,EAAUmG,GAC1C,GAIIY,GAJAsE,EAASlF,GAAWA,EAAQkF,OAC5B7M,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAChDiP,KACArD,EAAO1S,KAAK4S,KAIhB,KAAK,GAAIvS,KAAMqS,GACTA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,KACpB6M,GAAUA,EAAOtE,KACpB0G,EAAY/N,KAAKM,EAAS+G,EAAMhP,IAUtC,OAJIoO,IAAWA,EAAQgH,OACrBzV,KAAK0V,MAAMK,EAAatH,EAAQgH,OAG3BM,GAUTlV,EAAQsS,UAAUwC,cAAgB,SAAUtG,EAAMnB,GAChD,GAAI8H,KAEJ,KAAK,GAAIlH,KAASO,GACZA,EAAKvJ,eAAegJ,IAAoC,IAAzBZ,EAAOvH,QAAQmI,KAChDkH,EAAalH,GAASO,EAAKP,GAI/B,OAAOkH,IASTnV,EAAQsS,UAAUuC,MAAQ,SAAUzT,EAAOwT,GACzC,GAAI9U,EAAKwD,SAASsR,GAAQ,CAExB,GAAIQ,GAAOR,CACXxT,GAAMiU,KAAK,SAAU3Q,EAAGa,GACtB,GAAI+P,GAAK5Q,EAAE0Q,GACPG,EAAKhQ,EAAE6P,EACX,OAAQE,GAAKC,EAAM,EAAWA,EAALD,EAAW,GAAK,QAGxC,CAAA,GAAqB,kBAAVV,GAOd,KAAM,IAAIpP,WAAU,uCALpBpE,GAAMiU,KAAKT,KAgBf5U,EAAQsS,UAAUkD,OAAS,SAAUhW,EAAI0T,GACvC,GACIvO,GAAGC,EAAK6Q,EADRC,IAGJ,IAAItQ,MAAMC,QAAQ7F,GAChB,IAAKmF,EAAI,EAAGC,EAAMpF,EAAGsF,OAAYF,EAAJD,EAASA,IACpC8Q,EAAYtW,KAAKwW,QAAQnW,EAAGmF,IACX,MAAb8Q,GACFC,EAAWvO,KAAKsO,OAKpBA,GAAYtW,KAAKwW,QAAQnW,GACR,MAAbiW,GACFC,EAAWvO,KAAKsO,EAQpB,OAJIC,GAAW5Q,QACb3F,KAAK6T,SAAS,UAAW5R,MAAOsU,GAAaxC,GAGxCwC,GAST1V,EAAQsS,UAAUqD,QAAU,SAAUnW,GACpC,GAAIM,EAAKqD,SAAS3D,IAAOM,EAAKwD,SAAS9D,IACrC,GAAIL,KAAK4S,MAAMvS,GAEb,aADOL,MAAK4S,MAAMvS,GACXA,MAGN,IAAIA,YAAckG,QAAQ,CAC7B,GAAIgP,GAASlV,EAAGL,KAAK6S,SACrB,IAAI0C,GAAUvV,KAAK4S,MAAM2C,GAEvB,aADOvV,MAAK4S,MAAM2C,GACXA,EAGX,MAAO,OAQT1U,EAAQsS,UAAUsD,MAAQ,SAAU1C,GAClC,GAAIoB,GAAM5O,OAAO6G,KAAKpN,KAAK4S,MAM3B,OAJA5S,MAAK4S,SAEL5S,KAAK6T,SAAS,UAAW5R,MAAOkT,GAAMpB,GAE/BoB,GAQTtU,EAAQsS,UAAUzG,IAAM,SAAUoC,GAChC,GAAI4D,GAAO1S,KAAK4S,MACZlG,EAAM,KACNgK,EAAW,IAEf,KAAK,GAAIrW,KAAMqS,GACb,GAAIA,EAAK5M,eAAezF,GAAK,CAC3B,GAAIgP,GAAOqD,EAAKrS,GACZsW,EAAYtH,EAAKP,EACJ,OAAb6H,KAAuBjK,GAAOiK,EAAYD,KAC5ChK,EAAM2C,EACNqH,EAAWC,GAKjB,MAAOjK,IAQT7L,EAAQsS,UAAUrH,IAAM,SAAUgD,GAChC,GAAI4D,GAAO1S,KAAK4S,MACZ9G,EAAM,KACN8K,EAAW,IAEf,KAAK,GAAIvW,KAAMqS,GACb,GAAIA,EAAK5M,eAAezF,GAAK,CAC3B,GAAIgP,GAAOqD,EAAKrS,GACZsW,EAAYtH,EAAKP,EACJ,OAAb6H,KAAuB7K,GAAmB8K,EAAZD,KAChC7K,EAAMuD,EACNuH,EAAWD,GAKjB,MAAO7K,IAUTjL,EAAQsS,UAAU0D,SAAW,SAAU/H,GACrC,GAIItJ,GAJAkN,EAAO1S,KAAK4S,MACZkE,KACAC,EAAY/W,KAAK2S,SAAS7L,MAAQ9G,KAAK2S,SAAS7L,KAAKgI,IAAU,KAC/DkI,EAAQ,CAGZ,KAAK,GAAInR,KAAQ6M,GACf,GAAIA,EAAK5M,eAAeD,GAAO,CAC7B,GAAIwJ,GAAOqD,EAAK7M,GACZwB,EAAQgI,EAAKP,GACbmI,GAAS,CACb,KAAKzR,EAAI,EAAOwR,EAAJxR,EAAWA,IACrB,GAAIsR,EAAOtR,IAAM6B,EAAO,CACtB4P,GAAS,CACT,OAGCA,GAAqBzQ,SAAVa,IACdyP,EAAOE,GAAS3P,EAChB2P,KAKN,GAAID,EACF,IAAKvR,EAAI,EAAGA,EAAIsR,EAAOnR,OAAQH,IAC7BsR,EAAOtR,GAAK7E,EAAKkG,QAAQiQ,EAAOtR,GAAIuR,EAIxC,OAAOD,IASTjW,EAAQsS,UAAUiB,SAAW,SAAU/E,GACrC,GAAIhP,GAAKgP,EAAKrP,KAAK6S,SAEnB,IAAUrM,QAANnG,GAEF,GAAIL,KAAK4S,MAAMvS,GAEb,KAAM,IAAIuD,OAAM,iCAAmCvD,EAAK,uBAK1DA,GAAKM,EAAKqE,aACVqK,EAAKrP,KAAK6S,UAAYxS,CAGxB,IAAIsM,KACJ,KAAK,GAAImC,KAASO,GAChB,GAAIA,EAAKvJ,eAAegJ,GAAQ,CAC9B,GAAIiI,GAAY/W,KAAK+S,MAAMjE,EAC3BnC,GAAEmC,GAASnO,EAAKkG,QAAQwI,EAAKP,GAAQiI,GAKzC,MAFA/W,MAAK4S,MAAMvS,GAAMsM,EAEVtM,GAUTQ,EAAQsS,UAAUqC,SAAW,SAAUnV,EAAI6W,GACzC,GAAIpI,GAAOzH,EAGP8P,EAAMnX,KAAK4S,MAAMvS,EACrB,KAAK8W,EACH,MAAO,KAIT,IAAIC,KACJ,IAAIF,EACF,IAAKpI,IAASqI,GACRA,EAAIrR,eAAegJ,KACrBzH,EAAQ8P,EAAIrI,GACZsI,EAAUtI,GAASnO,EAAKkG,QAAQQ,EAAO6P,EAAMpI,SAMjD,KAAKA,IAASqI,GACRA,EAAIrR,eAAegJ,KACrBzH,EAAQ8P,EAAIrI,GACZsI,EAAUtI,GAASzH,EAIzB,OAAO+P,IAWTvW,EAAQsS,UAAU8B,YAAc,SAAU5F,GACxC,GAAIhP,GAAKgP,EAAKrP,KAAK6S,SACnB,IAAUrM,QAANnG,EACF,KAAM,IAAIuD,OAAM,6CAA+CyT,KAAKC,UAAUjI,GAAQ,IAExF,IAAI1C,GAAI3M,KAAK4S,MAAMvS,EACnB,KAAKsM,EAEH,KAAM,IAAI/I,OAAM,uCAAyCvD,EAAK,SAIhE,KAAK,GAAIyO,KAASO,GAChB,GAAIA,EAAKvJ,eAAegJ,GAAQ,CAC9B,GAAIiI,GAAY/W,KAAK+S,MAAMjE,EAC3BnC,GAAEmC,GAASnO,EAAKkG,QAAQwI,EAAKP,GAAQiI,GAIzC,MAAO1W,IASTQ,EAAQsS,UAAUmB,gBAAkB,SAAUiD,GAE5C,IAAK,GADDlD,MACKK,EAAM,EAAGC,EAAO4C,EAAUC,qBAA4B7C,EAAND,EAAYA,IACnEL,EAAQK,GAAO6C,EAAUE,YAAY/C,IAAQ6C,EAAUG,eAAehD,EAExE,OAAOL,IAUTxT,EAAQsS,UAAUyC,WAAa,SAAU2B,EAAWlD,EAAShF,GAG3D,IAAK,GAFDkF,GAAMgD,EAAUI,SAEXjD,EAAM,EAAGC,EAAON,EAAQ1O,OAAcgP,EAAND,EAAYA,IAAO,CAC1D,GAAI5F,GAAQuF,EAAQK,EACpB6C,GAAUK,SAASrD,EAAKG,EAAKrF,EAAKP,MAItCjP,EAAOD,QAAUiB,GAKb,SAAShB,EAAQD,EAASM,GAe9B,QAASY,GAAU4R,EAAMjE,GACvBzO,KAAK4S,MAAQ,KACb5S,KAAK6X,QACL7X,KAAK2S,SAAWlE,MAChBzO,KAAK6S,SAAW,KAChB7S,KAAKgT,eAEL,IAAImB,GAAKnU,IACTA,MAAK8I,SAAW,WACdqL,EAAG2D,SAASC,MAAM5D,EAAIzO,YAGxB1F,KAAKgY,QAAQtF,GAzBf,GAAI/R,GAAOT,EAAoB,GAC3BW,EAAUX,EAAoB,EAkClCY,GAASqS,UAAU6E,QAAU,SAAUtF,GACrC,GAAIyC,GAAK3P,EAAGC,CAEZ,IAAIzF,KAAK4S,MAAO,CAEV5S,KAAK4S,MAAMgB,aACb5T,KAAK4S,MAAMgB,YAAY,IAAK5T,KAAK8I,UAInCqM,IACA,KAAK,GAAI9U,KAAML,MAAK6X,KACd7X,KAAK6X,KAAK/R,eAAezF,IAC3B8U,EAAInN,KAAK3H,EAGbL,MAAK6X,QACL7X,KAAK6T,SAAS,UAAW5R,MAAOkT,IAKlC,GAFAnV,KAAK4S,MAAQF,EAET1S,KAAK4S,MAAO,CAQd,IANA5S,KAAK6S,SAAW7S,KAAK2S,SAASG,SACzB9S,KAAK4S,OAAS5S,KAAK4S,MAAMnE,SAAWzO,KAAK4S,MAAMnE,QAAQqE,SACxD,KAGJqC,EAAMnV,KAAK4S,MAAMiD,QAAQlC,OAAQ3T,KAAK2S,UAAY3S,KAAK2S,SAASgB,SAC3DnO,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACTxF,KAAK6X,KAAKxX,IAAM,CAElBL,MAAK6T,SAAS,OAAQ5R,MAAOkT,IAGzBnV,KAAK4S,MAAMW,IACbvT,KAAK4S,MAAMW,GAAG,IAAKvT,KAAK8I,YAuC9BhI,EAASqS,UAAU+B,IAAM,WACvB,GAGIC,GAAK1G,EAASiE,EAHdyB,EAAKnU,KAILoV,EAAYzU,EAAKwG,QAAQzB,UAAU,GACtB,WAAb0P,GAAsC,UAAbA,GAAsC,SAAbA,GAEpDD,EAAMzP,UAAU,GAChB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,KAIjB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,GAInB,IAAIuS,GAActX,EAAK2E,UAAWtF,KAAK2S,SAAUlE,EAG7CzO,MAAK2S,SAASgB,QAAUlF,GAAWA,EAAQkF,SAC7CsE,EAAYtE,OAAS,SAAUtE,GAC7B,MAAO8E,GAAGxB,SAASgB,OAAOtE,IAASZ,EAAQkF,OAAOtE,IAKtD,IAAI6I,KAOJ,OANW1R,SAAP2O,GACF+C,EAAalQ,KAAKmN,GAEpB+C,EAAalQ,KAAKiQ,GAClBC,EAAalQ,KAAK0K,GAEX1S,KAAK4S,OAAS5S,KAAK4S,MAAMsC,IAAI6C,MAAM/X,KAAK4S,MAAOsF,IAWxDpX,EAASqS,UAAU0C,OAAS,SAAUpH,GACpC,GAAI0G,EAEJ,IAAInV,KAAK4S,MAAO,CACd,GACIe,GADAwE,EAAgBnY,KAAK2S,SAASgB,MAK9BA,GAFAlF,GAAWA,EAAQkF,OACjBwE,EACO,SAAU9I,GACjB,MAAO8I,GAAc9I,IAASZ,EAAQkF,OAAOtE,IAItCZ,EAAQkF,OAIVwE,EAGXhD,EAAMnV,KAAK4S,MAAMiD,QACflC,OAAQA,EACR8B,MAAOhH,GAAWA,EAAQgH,YAI5BN,KAGF,OAAOA,IAQTrU,EAASqS,UAAU2C,WAAa,WAE9B,IADA,GAAIsC,GAAUpY,KACPoY,YAAmBtX,IACxBsX,EAAUA,EAAQxF,KAEpB,OAAOwF,IAAW,MAYpBtX,EAASqS,UAAU2E,SAAW,SAAUxO,EAAOwK,EAAQC,GACrD,GAAIvO,GAAGC,EAAKpF,EAAIgP,EACZ8F,EAAMrB,GAAUA,EAAO7R,MACvByQ,EAAO1S,KAAK4S,MACZyF,KACAC,KACAC,IAEJ,IAAIpD,GAAOzC,EAAM,CACf,OAAQpJ,GACN,IAAK,MAEH,IAAK9D,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACT6J,EAAOrP,KAAKkV,IAAI7U,GACZgP,IACFrP,KAAK6X,KAAKxX,IAAM,EAChBgY,EAAMrQ,KAAK3H,GAIf,MAEF,KAAK,SAGH,IAAKmF,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACT6J,EAAOrP,KAAKkV,IAAI7U,GAEZgP,EACErP,KAAK6X,KAAKxX,GACZiY,EAAQtQ,KAAK3H,IAGbL,KAAK6X,KAAKxX,IAAM,EAChBgY,EAAMrQ,KAAK3H,IAITL,KAAK6X,KAAKxX,WACLL,MAAK6X,KAAKxX,GACjBkY,EAAQvQ,KAAK3H,GAQnB,MAEF,KAAK,SAEH,IAAKmF,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACLxF,KAAK6X,KAAKxX,WACLL,MAAK6X,KAAKxX,GACjBkY,EAAQvQ,KAAK3H,IAOjBgY,EAAM1S,QACR3F,KAAK6T,SAAS,OAAQ5R,MAAOoW,GAAQtE,GAEnCuE,EAAQ3S,QACV3F,KAAK6T,SAAS,UAAW5R,MAAOqW,GAAUvE,GAExCwE,EAAQ5S,QACV3F,KAAK6T,SAAS,UAAW5R,MAAOsW,GAAUxE,KAMhDjT,EAASqS,UAAUI,GAAK1S,EAAQsS,UAAUI,GAC1CzS,EAASqS,UAAUO,IAAM7S,EAAQsS,UAAUO,IAC3C5S,EAASqS,UAAUU,SAAWhT,EAAQsS,UAAUU,SAGhD/S,EAASqS,UAAUM,UAAY3S,EAASqS,UAAUI,GAClDzS,EAASqS,UAAUS,YAAc9S,EAASqS,UAAUO,IAEpD7T,EAAOD,QAAUkB,GAIb,SAASjB,GAeb,QAASkB,GAAM0N,GAEbzO,KAAKwY,MAAQ,KACbxY,KAAK0M,IAAM+L,IAGXzY,KAAKqT,UACLrT,KAAK0Y,SAAW,KAChB1Y,KAAK2Y,UAAY,KAEjB3Y,KAAKkT,WAAWzE,GAgBlB1N,EAAMoS,UAAUD,WAAa,SAAUzE,GACjCA,GAAoC,mBAAlBA,GAAQ+J,QAC5BxY,KAAKwY,MAAQ/J,EAAQ+J,OAEnB/J,GAAkC,mBAAhBA,GAAQ/B,MAC5B1M,KAAK0M,IAAM+B,EAAQ/B,KAGrB1M,KAAK4Y,kBAsBP7X,EAAMuE,OAAS,SAAUrB,EAAQwK,GAC/B,GAAI2E,GAAQ,GAAIrS,GAAM0N,EAEtB,IAAqBjI,SAAjBvC,EAAO4U,MACT,KAAM,IAAIjV,OAAM,6CAElBK,GAAO4U,MAAQ,WACbzF,EAAMyF,QAGR,IAAIC,KACF7C,KAAM,QACN8C,SAAUvS,QAGZ,IAAIiI,GAAWA,EAAQjE,QACrB,IAAK,GAAIhF,GAAI,EAAGA,EAAIiJ,EAAQjE,QAAQ7E,OAAQH,IAAK,CAC/C,GAAIyQ,GAAOxH,EAAQjE,QAAQhF,EAC3BsT,GAAQ9Q,MACNiO,KAAMA,EACN8C,SAAU9U,EAAOgS,KAEnB7C,EAAM5I,QAAQvG,EAAQgS,GAS1B,MALA7C,GAAMuF,WACJ1U,OAAQA,EACR6U,QAASA,GAGJ1F,GAOTrS,EAAMoS,UAAUG,QAAU,WAGxB,GAFAtT,KAAK6Y,QAED7Y,KAAK2Y,UAAW,CAGlB,IAAK,GAFD1U,GAASjE,KAAK2Y,UAAU1U,OACxB6U,EAAU9Y,KAAK2Y,UAAUG,QACpBtT,EAAI,EAAGA,EAAIsT,EAAQnT,OAAQH,IAAK,CACvC,GAAIwT,GAASF,EAAQtT,EACjBwT,GAAOD,SACT9U,EAAO+U,EAAO/C,MAAQ+C,EAAOD,eAGtB9U,GAAO+U,EAAO/C,MAGzBjW,KAAK2Y,UAAY,OASrB5X,EAAMoS,UAAU3I,QAAU,SAASvG,EAAQ+U,GACzC,GAAI7E,GAAKnU,KACL+Y,EAAW9U,EAAO+U,EACtB,KAAKD,EACH,KAAM,IAAInV,OAAM,UAAYoV,EAAS,aAGvC/U,GAAO+U,GAAU,WAGf,IAAK,GADDC,MACKzT,EAAI,EAAGA,EAAIE,UAAUC,OAAQH,IACpCyT,EAAKzT,GAAKE,UAAUF,EAItB2O,GAAGf,OACD6F,KAAMA,EACNC,GAAIH,EACJI,QAASnZ,SASfe,EAAMoS,UAAUC,MAAQ,SAASgG,GAE7BpZ,KAAKqT,OAAOrL,KADO,kBAAVoR,IACSF,GAAIE,GAGLA,GAGnBpZ,KAAK4Y,kBAOP7X,EAAMoS,UAAUyF,eAAiB,WAQ/B,GANI5Y,KAAKqT,OAAO1N,OAAS3F,KAAK0M,KAC5B1M,KAAK6Y,QAIPQ,aAAarZ,KAAK0Y,UACd1Y,KAAKoT,MAAMzN,OAAS,GAA2B,gBAAf3F,MAAKwY,MAAoB,CAC3D,GAAIrE,GAAKnU,IACTA,MAAK0Y,SAAWY,WAAW,WACzBnF,EAAG0E,SACF7Y,KAAKwY,SAOZzX,EAAMoS,UAAU0F,MAAQ,WACtB,KAAO7Y,KAAKqT,OAAO1N,OAAS,GAAG,CAC7B,GAAIyT,GAAQpZ,KAAKqT,OAAO/B,OACxB8H,GAAMF,GAAGnB,MAAMqB,EAAMD,SAAWC,EAAMF,GAAIE,EAAMH,YAIpDpZ,EAAOD,QAAUmB,GAKb,SAASlB,EAAQD,EAASM,GAwB9B,QAASc,GAAQuY,EAAW7G,EAAMjE,GAChC,KAAMzO,eAAgBgB,IACpB,KAAM,IAAIwY,aAAY,mDAIxBxZ,MAAKyZ,iBAAmBF,EACxBvZ,KAAKuS,MAAQ,QACbvS,KAAKwS,OAAS,QACdxS,KAAK0Z,OAAS,GACd1Z,KAAK2Z,eAAiB,MACtB3Z,KAAK4Z,eAAiB,MAEtB5Z,KAAK6Z,OAAS,IACd7Z,KAAK8Z,OAAS,IACd9Z,KAAK+Z,OAAS,GAEd,IAAIC,GAAc,SAASnO,GAAK,MAAOA,GACvC7L,MAAKia,YAAcD,EACnBha,KAAKka,YAAcF,EACnBha,KAAKma,YAAcH,EAEnBha,KAAKoa,YAAc,OACnBpa,KAAKqa,YAAc,QAEnBra,KAAKiN,MAAQjM,EAAQsZ,MAAMC,IAC3Bva,KAAKwa,iBAAkB,EACvBxa,KAAKya,UAAW,EAChBza,KAAK0a,iBAAkB,EACvB1a,KAAK2a,YAAa,EAClB3a,KAAK4a,gBAAiB,EACtB5a,KAAK6a,aAAc,EACnB7a,KAAK8a,cAAgB,GAErB9a,KAAK+a,kBAAoB,IACzB/a,KAAKgb,kBAAmB,EAExBhb,KAAKib,OAAS,GAAI/Z,GAClBlB,KAAKkb,IAAM,GAAI7Z,GAAQ,EAAG,EAAG,IAE7BrB,KAAKuX,UAAY,KACjBvX,KAAKmb,WAAa,KAGlBnb,KAAKob,KAAO5U,OACZxG,KAAKqb,KAAO7U,OACZxG,KAAKsb,KAAO9U,OACZxG,KAAKub,SAAW/U,OAChBxG,KAAKwb,UAAYhV,OAEjBxG,KAAKyb,KAAO,EACZzb,KAAK0b,MAAQlV,OACbxG,KAAK2b,KAAO,EACZ3b,KAAK4b,KAAO,EACZ5b,KAAK6b,MAAQrV,OACbxG,KAAK8b,KAAO,EACZ9b,KAAK+b,KAAO,EACZ/b,KAAKgc,MAAQxV,OACbxG,KAAKic,KAAO,EACZjc,KAAKkc,SAAW,EAChBlc,KAAKmc,SAAW,EAChBnc,KAAKoc,UAAY,EACjBpc,KAAKqc,UAAY,EAIjBrc,KAAKsc,UAAY,UACjBtc,KAAKuc,UAAY,UACjBvc,KAAKwc,SAAW,UAChBxc,KAAKyc,eAAiB,UAGtBzc,KAAKqO,SAGLrO,KAAKkT,WAAWzE,GAGZiE,GACF1S,KAAKgY,QAAQtF,GAknEjB,QAASgK,GAAWpT,GAClB,MAAI,WAAaA,GAAcA,EAAMqT,QAC9BrT,EAAMsT,cAAc,IAAMtT,EAAMsT,cAAc,GAAGD,SAAW,EAQrE,QAASE,GAAWvT,GAClB,MAAI,WAAaA,GAAcA,EAAMwT,QAC9BxT,EAAMsT,cAAc,IAAMtT,EAAMsT,cAAc,GAAGE,SAAW,EAnuErE,GAAIC,GAAU7c,EAAoB,IAC9BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BS,EAAOT,EAAoB,GAC3BmB,EAAUnB,EAAoB,IAC9BkB,EAAUlB,EAAoB,GAC9BgB,EAAShB,EAAoB,GAC7BiB,EAASjB,EAAoB,GAC7BoB,EAASpB,EAAoB,IAC7BqB,EAAarB,EAAoB,GAiGrC6c,GAAQ/b,EAAQmS,WAKhBnS,EAAQmS,UAAU6J,UAAY,WAC5Bhd,KAAKid,MAAQ,GAAI5b,GAAQ,GAAKrB,KAAK2b,KAAO3b,KAAKyb,MAC7C,GAAKzb,KAAK8b,KAAO9b,KAAK4b,MACtB,GAAK5b,KAAKic,KAAOjc,KAAK+b,OAGpB/b,KAAK0a,kBACH1a,KAAKid,MAAMlL,EAAI/R,KAAKid,MAAMjL,EAE5BhS,KAAKid,MAAMjL,EAAIhS,KAAKid,MAAMlL,EAI1B/R,KAAKid,MAAMlL,EAAI/R,KAAKid,MAAMjL,GAK9BhS,KAAKid,MAAMC,GAAKld,KAAK8a,cAIrB9a,KAAKid,MAAM5V,MAAQ,GAAKrH,KAAKmc,SAAWnc,KAAKkc,SAG7C,IAAIiB,IAAWnd,KAAK2b,KAAO3b,KAAKyb,MAAQ,EAAIzb,KAAKid,MAAMlL,EACnDqL,GAAWpd,KAAK8b,KAAO9b,KAAK4b,MAAQ,EAAI5b,KAAKid,MAAMjL,EACnDqL,GAAWrd,KAAKic,KAAOjc,KAAK+b,MAAQ,EAAI/b,KAAKid,MAAMC,CACvDld,MAAKib,OAAOqC,eAAeH,EAASC,EAASC,IAU/Crc,EAAQmS,UAAUoK,eAAiB,SAASC,GAC1C,GAAIC,GAAczd,KAAK0d,2BAA2BF,EAClD,OAAOxd,MAAK2d,4BAA4BF,IAW1Czc,EAAQmS,UAAUuK,2BAA6B,SAASF,GACtD,GAAII,GAAKJ,EAAQzL,EAAI/R,KAAKid,MAAMlL,EAC9B8L,EAAKL,EAAQxL,EAAIhS,KAAKid,MAAMjL,EAC5B8L,EAAKN,EAAQN,EAAIld,KAAKid,MAAMC,EAE5Ba,EAAK/d,KAAKib,OAAO+C,oBAAoBjM,EACrCkM,EAAKje,KAAKib,OAAO+C,oBAAoBhM,EACrCkM,EAAKle,KAAKib,OAAO+C,oBAAoBd,EAGrCiB,EAAQjZ,KAAKkZ,IAAIpe,KAAKib,OAAOoD,oBAAoBtM,GACjDuM,EAAQpZ,KAAKqZ,IAAIve,KAAKib,OAAOoD,oBAAoBtM,GACjDyM,EAAQtZ,KAAKkZ,IAAIpe,KAAKib,OAAOoD,oBAAoBrM,GACjDyM,EAAQvZ,KAAKqZ,IAAIve,KAAKib,OAAOoD,oBAAoBrM,GACjD0M,EAAQxZ,KAAKkZ,IAAIpe,KAAKib,OAAOoD,oBAAoBnB,GACjDyB,EAAQzZ,KAAKqZ,IAAIve,KAAKib,OAAOoD,oBAAoBnB,GAGjD0B,EAAKH,GAASC,GAASb,EAAKI,GAAMU,GAASf,EAAKG,IAAOS,GAASV,EAAKI,GACrEW,EAAKV,GAASM,GAASX,EAAKI,GAAMM,GAASE,GAASb,EAAKI,GAAMU,GAASf,EAAKG,KAAQO,GAASK,GAASd,EAAKI,GAAMS,GAASd,EAAGG,IAC9He,EAAKR,GAASG,GAASX,EAAKI,GAAMM,GAASE,GAASb,EAAKI,GAAMU,GAASf,EAAKG,KAAQI,GAASQ,GAASd,EAAKI,GAAMS,GAASd,EAAGG,GAEhI,OAAO,IAAI1c,GAAQud,EAAIC,EAAIC,IAU7B9d,EAAQmS,UAAUwK,4BAA8B,SAASF,GACvD,GAQIsB,GACAC,EATAC,EAAKjf,KAAKkb,IAAInJ,EAChBmN,EAAKlf,KAAKkb,IAAIlJ,EACdmN,EAAKnf,KAAKkb,IAAIgC,EACd0B,EAAKnB,EAAY1L,EACjB8M,EAAKpB,EAAYzL,EACjB8M,EAAKrB,EAAYP,CAgBnB,OAXIld,MAAKwa,iBACPuE,GAAMH,EAAKK,IAAOE,EAAKL,GACvBE,GAAMH,EAAKK,IAAOC,EAAKL,KAGvBC,EAAKH,IAAOO,EAAKnf,KAAKib,OAAOmE,gBAC7BJ,EAAKH,IAAOM,EAAKnf,KAAKib,OAAOmE,iBAKxB,GAAIhe,GACTpB,KAAKqf,QAAUN,EAAK/e,KAAKsf,MAAMC,OAAOC,YACtCxf,KAAKyf,QAAUT,EAAKhf,KAAKsf,MAAMC,OAAOC,cAO1Cxe,EAAQmS,UAAUuM,oBAAsB,SAASC,GAC/C,GAAIC,GAAO,QACPC,EAAS,OACTC,EAAc,CAElB,IAAgC,gBAAtB,GACRF,EAAOD,EACPE,EAAS,OACTC,EAAc,MAEX,IAAgC,gBAAtB,GACgBtZ,SAAzBmZ,EAAgBC,OAAuBA,EAAOD,EAAgBC,MACnCpZ,SAA3BmZ,EAAgBE,SAAyBA,EAASF,EAAgBE,QAClCrZ,SAAhCmZ,EAAgBG,cAA2BA,EAAcH,EAAgBG,iBAE1E,IAAyBtZ,SAApBmZ,EAIR,KAAM,qCAGR3f,MAAKsf,MAAMrS,MAAM0S,gBAAkBC,EACnC5f,KAAKsf,MAAMrS,MAAM8S,YAAcF,EAC/B7f,KAAKsf,MAAMrS,MAAM+S,YAAcF,EAAc,KAC7C9f,KAAKsf,MAAMrS,MAAMgT,YAAc,SAKjCjf,EAAQsZ,OACN4F,IAAK,EACLC,SAAU,EACVC,QAAS,EACT7F,IAAM,EACN8F,QAAU,EACVC,SAAU,EACVC,QAAS,EACTC,KAAO,EACPC,KAAM,EACNC,QAAU,GASZ1f,EAAQmS,UAAUwN,gBAAkB,SAASC,GAC3C,OAAQA,GACN,IAAK,MAAW,MAAO5f,GAAQsZ,MAAMC,GACrC,KAAK,WAAa,MAAOvZ,GAAQsZ,MAAM+F,OACvC,KAAK,YAAe,MAAOrf,GAAQsZ,MAAMgG,QACzC,KAAK,WAAa,MAAOtf,GAAQsZ,MAAMiG,OACvC,KAAK,OAAW,MAAOvf,GAAQsZ,MAAMmG,IACrC,KAAK,OAAW,MAAOzf,GAAQsZ,MAAMkG,IACrC,KAAK,UAAa,MAAOxf,GAAQsZ,MAAMoG,OACvC,KAAK,MAAW,MAAO1f,GAAQsZ,MAAM4F,GACrC,KAAK,YAAe,MAAOlf,GAAQsZ,MAAM6F,QACzC,KAAK,WAAa,MAAOnf,GAAQsZ,MAAM8F,QAGzC,MAAO,IAQTpf,EAAQmS,UAAU0N,wBAA0B,SAASnO,GACnD,GAAI1S,KAAKiN,QAAUjM,EAAQsZ,MAAMC,KAC/Bva,KAAKiN,QAAUjM,EAAQsZ,MAAM+F,SAC7BrgB,KAAKiN,QAAUjM,EAAQsZ,MAAMmG,MAC7BzgB,KAAKiN,QAAUjM,EAAQsZ,MAAMkG,MAC7BxgB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,SAC7B1gB,KAAKiN,QAAUjM,EAAQsZ,MAAM4F,IAE7BlgB,KAAKob,KAAO,EACZpb,KAAKqb,KAAO,EACZrb,KAAKsb,KAAO,EACZtb,KAAKub,SAAW/U,OAEZkM,EAAK8E,qBAAuB,IAC9BxX,KAAKwb,UAAY,OAGhB,CAAA,GAAIxb,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,UACpCtgB,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,SAC7BvgB,KAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAC7BngB,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,QAY7B,KAAM,kBAAoBpgB,KAAKiN,MAAQ,GAVvCjN,MAAKob,KAAO,EACZpb,KAAKqb,KAAO,EACZrb,KAAKsb,KAAO,EACZtb,KAAKub,SAAW,EAEZ7I,EAAK8E,qBAAuB,IAC9BxX,KAAKwb,UAAY,KAQvBxa,EAAQmS,UAAUsB,gBAAkB,SAAS/B,GAC3C,MAAOA,GAAK/M,QAId3E,EAAQmS,UAAUqE,mBAAqB,SAAS9E,GAC9C,GAAIoO,GAAU,CACd,KAAK,GAAIC,KAAUrO,GAAK,GAClBA,EAAK,GAAG5M,eAAeib,IACzBD,GAGJ,OAAOA,IAIT9f,EAAQmS,UAAU6N,kBAAoB,SAAStO,EAAMqO,GAEnD,IAAK,GADDE,MACKzb,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IACgB,IAA3Cyb,EAAeta,QAAQ+L,EAAKlN,GAAGub,KACjCE,EAAejZ,KAAK0K,EAAKlN,GAAGub,GAGhC,OAAOE,IAITjgB,EAAQmS,UAAU+N,eAAiB,SAASxO,EAAKqO,GAE/C,IAAK,GADDI,IAAUrV,IAAI4G,EAAK,GAAGqO,GAAQrU,IAAIgG,EAAK,GAAGqO,IACrCvb,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAC3B2b,EAAOrV,IAAM4G,EAAKlN,GAAGub,KAAWI,EAAOrV,IAAM4G,EAAKlN,GAAGub,IACrDI,EAAOzU,IAAMgG,EAAKlN,GAAGub,KAAWI,EAAOzU,IAAMgG,EAAKlN,GAAGub,GAE3D,OAAOI,IASTngB,EAAQmS,UAAUiO,gBAAkB,SAAUC,GAC5C,GAAIlN,GAAKnU,IAOT,IAJIA,KAAKoY,SACPpY,KAAKoY,QAAQ1E,IAAI,IAAK1T,KAAKshB,WAGb9a,SAAZ6a,EAAJ,CAGIpb,MAAMC,QAAQmb,KAChBA,EAAU,GAAIxgB,GAAQwgB,GAGxB,IAAI3O,EACJ,MAAI2O,YAAmBxgB,IAAWwgB,YAAmBvgB,IAInD,KAAM,IAAI8C,OAAM,uCAGlB,IANE8O,EAAO2O,EAAQnM,MAME,GAAfxC,EAAK/M,OAAT,CAGA3F,KAAKoY,QAAUiJ,EACfrhB,KAAKuX,UAAY7E,EAGjB1S,KAAKshB,UAAY,WACfnN,EAAG6D,QAAQ7D,EAAGiE,UAEhBpY,KAAKoY,QAAQ7E,GAAG,IAAKvT,KAAKshB,WAS1BthB,KAAKob,KAAO,IACZpb,KAAKqb,KAAO,IACZrb,KAAKsb,KAAO,IACZtb,KAAKub,SAAW,QAChBvb,KAAKwb,UAAY,SAKb9I,EAAK,GAAG5M,eAAe,WACDU,SAApBxG,KAAKuhB,aACPvhB,KAAKuhB,WAAa,GAAIpgB,GAAOkgB,EAASrhB,KAAKwb,UAAWxb,MACtDA,KAAKuhB,WAAWC,kBAAkB,WAAYrN,EAAGsN,WAKrD,IAAIC,GAAW1hB,KAAKiN,OAASjM,EAAQsZ,MAAM4F,KACzClgB,KAAKiN,OAASjM,EAAQsZ,MAAM6F,UAC5BngB,KAAKiN,OAASjM,EAAQsZ,MAAM8F,OAG9B,IAAIsB,EAAU,CACZ,GAA8Blb,SAA1BxG,KAAK2hB,iBACP3hB,KAAKoc,UAAYpc,KAAK2hB,qBAEnB,CACH,GAAIC,GAAQ5hB,KAAKghB,kBAAkBtO,EAAK1S,KAAKob,KAC7Cpb,MAAKoc,UAAawF,EAAM,GAAKA,EAAM,IAAO,EAG5C,GAA8Bpb,SAA1BxG,KAAK6hB,iBACP7hB,KAAKqc,UAAYrc,KAAK6hB,qBAEnB,CACH,GAAIC,GAAQ9hB,KAAKghB,kBAAkBtO,EAAK1S,KAAKqb,KAC7Crb,MAAKqc,UAAayF,EAAM,GAAKA,EAAM,IAAO,GAK9C,GAAIC,GAAS/hB,KAAKkhB,eAAexO,EAAK1S,KAAKob,KACvCsG,KACFK,EAAOjW,KAAO9L,KAAKoc,UAAY,EAC/B2F,EAAOrV,KAAO1M,KAAKoc,UAAY,GAEjCpc,KAAKyb,KAA6BjV,SAArBxG,KAAKgiB,YAA6BhiB,KAAKgiB,YAAcD,EAAOjW,IACzE9L,KAAK2b,KAA6BnV,SAArBxG,KAAKiiB,YAA6BjiB,KAAKiiB,YAAcF,EAAOrV,IACrE1M,KAAK2b,MAAQ3b,KAAKyb,OAAMzb,KAAK2b,KAAO3b,KAAKyb,KAAO,GACpDzb,KAAK0b,MAA+BlV,SAAtBxG,KAAKkiB,aAA8BliB,KAAKkiB,cAAgBliB,KAAK2b,KAAK3b,KAAKyb,MAAM,CAE3F,IAAI0G,GAASniB,KAAKkhB,eAAexO,EAAK1S,KAAKqb,KACvCqG,KACFS,EAAOrW,KAAO9L,KAAKqc,UAAY,EAC/B8F,EAAOzV,KAAO1M,KAAKqc,UAAY,GAEjCrc,KAAK4b,KAA6BpV,SAArBxG,KAAKoiB,YAA6BpiB,KAAKoiB,YAAcD,EAAOrW,IACzE9L,KAAK8b,KAA6BtV,SAArBxG,KAAKqiB,YAA6BriB,KAAKqiB,YAAcF,EAAOzV,IACrE1M,KAAK8b,MAAQ9b,KAAK4b,OAAM5b,KAAK8b,KAAO9b,KAAK4b,KAAO,GACpD5b,KAAK6b,MAA+BrV,SAAtBxG,KAAKsiB,aAA8BtiB,KAAKsiB,cAAgBtiB,KAAK8b,KAAK9b,KAAK4b,MAAM,CAE3F;GAAI2G,GAASviB,KAAKkhB,eAAexO,EAAK1S,KAAKsb,KAM3C,IALAtb,KAAK+b,KAA6BvV,SAArBxG,KAAKwiB,YAA6BxiB,KAAKwiB,YAAcD,EAAOzW,IACzE9L,KAAKic,KAA6BzV,SAArBxG,KAAKyiB,YAA6BziB,KAAKyiB,YAAcF,EAAO7V,IACrE1M,KAAKic,MAAQjc,KAAK+b,OAAM/b,KAAKic,KAAOjc,KAAK+b,KAAO,GACpD/b,KAAKgc,MAA+BxV,SAAtBxG,KAAK0iB,aAA8B1iB,KAAK0iB,cAAgB1iB,KAAKic,KAAKjc,KAAK+b,MAAM,EAErEvV,SAAlBxG,KAAKub,SAAwB,CAC/B,GAAIoH,GAAa3iB,KAAKkhB,eAAexO,EAAK1S,KAAKub,SAC/Cvb,MAAKkc,SAAqC1V,SAAzBxG,KAAK4iB,gBAAiC5iB,KAAK4iB,gBAAkBD,EAAW7W,IACzF9L,KAAKmc,SAAqC3V,SAAzBxG,KAAK6iB,gBAAiC7iB,KAAK6iB,gBAAkBF,EAAWjW,IACrF1M,KAAKmc,UAAYnc,KAAKkc,WAAUlc,KAAKmc,SAAWnc,KAAKkc,SAAW,GAItElc,KAAKgd,eAUPhc,EAAQmS,UAAU2P,eAAiB,SAAUpQ,GAE3C,GAAIX,GAAGC,EAAGxM,EAAG0X,EAAG6F,EAAK7Q,EAEjBiJ,IAEJ,IAAInb,KAAKiN,QAAUjM,EAAQsZ,MAAMkG,MAC/BxgB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,QAAS,CAKtC,GAAIkB,MACAE,IACJ,KAAKtc,EAAI,EAAGA,EAAIxF,KAAKyU,gBAAgB/B,GAAOlN,IAC1CuM,EAAIW,EAAKlN,GAAGxF,KAAKob,OAAS,EAC1BpJ,EAAIU,EAAKlN,GAAGxF,KAAKqb,OAAS,EAED,KAArBuG,EAAMjb,QAAQoL,IAChB6P,EAAM5Z,KAAK+J,GAEY,KAArB+P,EAAMnb,QAAQqL,IAChB8P,EAAM9Z,KAAKgK,EAIf,IAAIgR,GAAa,SAAUzd,EAAGa,GAC5B,MAAOb,GAAIa,EAEbwb,GAAM1L,KAAK8M,GACXlB,EAAM5L,KAAK8M,EAGX,IAAIC,KACJ,KAAKzd,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAAK,CAChCuM,EAAIW,EAAKlN,GAAGxF,KAAKob,OAAS,EAC1BpJ,EAAIU,EAAKlN,GAAGxF,KAAKqb,OAAS,EAC1B6B,EAAIxK,EAAKlN,GAAGxF,KAAKsb,OAAS,CAE1B,IAAI4H,GAAStB,EAAMjb,QAAQoL,GACvBoR,EAASrB,EAAMnb,QAAQqL,EAEAxL,UAAvByc,EAAWC,KACbD,EAAWC,MAGb,IAAI1F,GAAU,GAAInc,EAClBmc,GAAQzL,EAAIA,EACZyL,EAAQxL,EAAIA,EACZwL,EAAQN,EAAIA,EAEZ6F,KACAA,EAAI7Q,MAAQsL,EACZuF,EAAIK,MAAQ5c,OACZuc,EAAIM,OAAS7c,OACbuc,EAAIO,OAAS,GAAIjiB,GAAQ0Q,EAAGC,EAAGhS,KAAK+b,MAEpCkH,EAAWC,GAAQC,GAAUJ,EAE7B5H,EAAWnT,KAAK+a,GAIlB,IAAKhR,EAAI,EAAGA,EAAIkR,EAAWtd,OAAQoM,IACjC,IAAKC,EAAI,EAAGA,EAAIiR,EAAWlR,GAAGpM,OAAQqM,IAChCiR,EAAWlR,GAAGC,KAChBiR,EAAWlR,GAAGC,GAAGuR,WAAcxR,EAAIkR,EAAWtd,OAAO,EAAKsd,EAAWlR,EAAE,GAAGC,GAAKxL,OAC/Eyc,EAAWlR,GAAGC,GAAGwR,SAAcxR,EAAIiR,EAAWlR,GAAGpM,OAAO,EAAKsd,EAAWlR,GAAGC,EAAE,GAAKxL,OAClFyc,EAAWlR,GAAGC,GAAGyR,WACd1R,EAAIkR,EAAWtd,OAAO,GAAKqM,EAAIiR,EAAWlR,GAAGpM,OAAO,EACnDsd,EAAWlR,EAAE,GAAGC,EAAE,GAClBxL,YAOV,KAAKhB,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAC3B0M,EAAQ,GAAI7Q,GACZ6Q,EAAMH,EAAIW,EAAKlN,GAAGxF,KAAKob,OAAS,EAChClJ,EAAMF,EAAIU,EAAKlN,GAAGxF,KAAKqb,OAAS,EAChCnJ,EAAMgL,EAAIxK,EAAKlN,GAAGxF,KAAKsb,OAAS,EAEV9U,SAAlBxG,KAAKub,WACPrJ,EAAM7K,MAAQqL,EAAKlN,GAAGxF,KAAKub,WAAa,GAG1CwH,KACAA,EAAI7Q,MAAQA,EACZ6Q,EAAIO,OAAS,GAAIjiB,GAAQ6Q,EAAMH,EAAGG,EAAMF,EAAGhS,KAAK+b,MAChDgH,EAAIK,MAAQ5c,OACZuc,EAAIM,OAAS7c,OAEb2U,EAAWnT,KAAK+a,EAIpB,OAAO5H,IASTna,EAAQmS,UAAU9E,OAAS,WAEzB,KAAOrO,KAAKyZ,iBAAiBiK,iBAC3B1jB,KAAKyZ,iBAAiBtI,YAAYnR,KAAKyZ,iBAAiBkK,WAG1D3jB,MAAKsf,MAAQ/N,SAASM,cAAc,OACpC7R,KAAKsf,MAAMrS,MAAM2W,SAAW,WAC5B5jB,KAAKsf,MAAMrS,MAAM4W,SAAW,SAG5B7jB,KAAKsf,MAAMC,OAAShO,SAASM,cAAe,UAC5C7R,KAAKsf,MAAMC,OAAOtS,MAAM2W,SAAW,WACnC5jB,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMC,OAGhC,IAAIuE,GAAWvS,SAASM,cAAe,MACvCiS,GAAS7W,MAAM9B,MAAQ,MACvB2Y,EAAS7W,MAAM8W,WAAc,OAC7BD,EAAS7W,MAAM+W,QAAW,OAC1BF,EAASG,UAAa,mDACtBjkB,KAAKsf,MAAMC,OAAO9N,YAAYqS,GAGhC9jB,KAAKsf,MAAM3L,OAASpC,SAASM,cAAe,OAC5C7R,KAAKsf,MAAM3L,OAAO1G,MAAM2W,SAAW,WACnC5jB,KAAKsf,MAAM3L,OAAO1G,MAAMqW,OAAS,MACjCtjB,KAAKsf,MAAM3L,OAAO1G,MAAMxF,KAAO,MAC/BzH,KAAKsf,MAAM3L,OAAO1G,MAAMsF,MAAQ,OAChCvS,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAM3L,OAGlC,IAAIQ,GAAKnU,KACLkkB,EAAc,SAAU5a,GAAQ6K,EAAGgQ,aAAa7a,IAChD8a,EAAe,SAAU9a,GAAQ6K,EAAGkQ,cAAc/a,IAClDgb,EAAe,SAAUhb,GAAQ6K,EAAGoQ,SAASjb,IAC7Ckb,EAAY,SAAUlb,GAAQ6K,EAAGsQ,WAAWnb,GAGhD3I,GAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,UAAWmF,WACpD/jB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,YAAa2E,GACtDvjB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,aAAc6E,GACvDzjB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,aAAc+E,GACvD3jB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,YAAaiF,GAGtDxkB,KAAKyZ,iBAAiBhI,YAAYzR,KAAKsf,QAWzCte,EAAQmS,UAAUwR,QAAU,SAASpS,EAAOC,GAC1CxS,KAAKsf,MAAMrS,MAAMsF,MAAQA,EACzBvS,KAAKsf,MAAMrS,MAAMuF,OAASA,EAE1BxS,KAAK4kB,iBAMP5jB,EAAQmS,UAAUyR,cAAgB,WAChC5kB,KAAKsf,MAAMC,OAAOtS,MAAMsF,MAAQ,OAChCvS,KAAKsf,MAAMC,OAAOtS,MAAMuF,OAAS,OAEjCxS,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAKsf,MAAMC,OAAOC,YAC5Cxf,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAKsf,MAAMC,OAAOsF,aAG7C7kB,KAAKsf,MAAM3L,OAAO1G,MAAMsF,MAASvS,KAAKsf,MAAMC,OAAOC,YAAc,GAAU,MAM7Exe,EAAQmS,UAAU2R,eAAiB,WACjC,IAAK9kB,KAAKsf,MAAM3L,SAAW3T,KAAKsf,MAAM3L,OAAOoR,OAC3C,KAAM,wBAER/kB,MAAKsf,MAAM3L,OAAOoR,OAAOC,QAO3BhkB,EAAQmS,UAAU8R,cAAgB,WAC3BjlB,KAAKsf,MAAM3L,QAAW3T,KAAKsf,MAAM3L,OAAOoR,QAE7C/kB,KAAKsf,MAAM3L,OAAOoR,OAAOG,QAU3BlkB,EAAQmS,UAAUgS,cAAgB,WAG9BnlB,KAAKqf,QAD0D,MAA7Drf,KAAK2Z,eAAeyL,OAAOplB,KAAK2Z,eAAehU,OAAO,GAEtD0f,WAAWrlB,KAAK2Z,gBAAkB,IAChC3Z,KAAKsf,MAAMC,OAAOC,YAGP6F,WAAWrlB,KAAK2Z,gBAK/B3Z,KAAKyf,QAD0D,MAA7Dzf,KAAK4Z,eAAewL,OAAOplB,KAAK4Z,eAAejU,OAAO,GAEtD0f,WAAWrlB,KAAK4Z,gBAAkB,KAC/B5Z,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAKsf,MAAM3L,OAAOkR,cAGzCQ,WAAWrlB,KAAK4Z,iBAoBnC5Y,EAAQmS,UAAUmS,kBAAoB,SAASC,GACjC/e,SAAR+e,IAImB/e,SAAnB+e,EAAIC,YAA6Chf,SAAjB+e,EAAIE,UACtCzlB,KAAKib,OAAOyK,eAAeH,EAAIC,WAAYD,EAAIE,UAG5Bjf,SAAjB+e,EAAII,UACN3lB,KAAKib,OAAO2K,aAAaL,EAAII,UAG/B3lB,KAAKyhB,WASPzgB,EAAQmS,UAAU0S,kBAAoB,WACpC,GAAIN,GAAMvlB,KAAKib,OAAO6K,gBAEtB,OADAP,GAAII,SAAW3lB,KAAKib,OAAOmE,eACpBmG,GAMTvkB,EAAQmS,UAAU4S,UAAY,SAASrT,GAErC1S,KAAKohB,gBAAgB1O,EAAM1S,KAAKiN,OAK9BjN,KAAKmb,WAFHnb,KAAKuhB,WAEWvhB,KAAKuhB,WAAWuB,iBAIhB9iB,KAAK8iB,eAAe9iB,KAAKuX,WAI7CvX,KAAKgmB,iBAOPhlB,EAAQmS,UAAU6E,QAAU,SAAUtF,GACpC1S,KAAK+lB,UAAUrT,GACf1S,KAAKyhB,SAGDzhB,KAAKimB,oBAAsBjmB,KAAKuhB,YAClCvhB,KAAK8kB,kBAQT9jB,EAAQmS,UAAUD,WAAa,SAAUzE,GACvC,GAAIyX,GAAiB1f,MAIrB,IAFAxG,KAAKilB,gBAEWze,SAAZiI,EAAuB,CAkBzB,GAhBsBjI,SAAlBiI,EAAQ8D,QAA2BvS,KAAKuS,MAAQ9D,EAAQ8D,OACrC/L,SAAnBiI,EAAQ+D,SAA2BxS,KAAKwS,OAAS/D,EAAQ+D,QAErChM,SAApBiI,EAAQ0O,UAA2Bnd,KAAK2Z,eAAiBlL,EAAQ0O,SAC7C3W,SAApBiI,EAAQ2O,UAA2Bpd,KAAK4Z,eAAiBnL,EAAQ2O,SAEzC5W,SAAxBiI,EAAQ2L,cAA+Bpa,KAAKoa,YAAc3L,EAAQ2L,aAC1C5T,SAAxBiI,EAAQ4L,cAA+Bra,KAAKqa,YAAc5L,EAAQ4L,aAC/C7T,SAAnBiI,EAAQoL,SAA0B7Z,KAAK6Z,OAASpL,EAAQoL,QACrCrT,SAAnBiI,EAAQqL,SAA0B9Z,KAAK8Z,OAASrL,EAAQqL,QACrCtT,SAAnBiI,EAAQsL,SAA0B/Z,KAAK+Z,OAAStL,EAAQsL,QAEhCvT,SAAxBiI,EAAQwL,cAA+Bja,KAAKia,YAAcxL,EAAQwL,aAC1CzT,SAAxBiI,EAAQyL,cAA+Bla,KAAKka,YAAczL,EAAQyL,aAC1C1T,SAAxBiI,EAAQ0L,cAA+Bna,KAAKma,YAAc1L,EAAQ0L,aAEhD3T,SAAlBiI,EAAQxB,MAAqB,CAC/B,GAAIkZ,GAAcnmB,KAAK2gB,gBAAgBlS,EAAQxB,MAC3B,MAAhBkZ,IACFnmB,KAAKiN,MAAQkZ,GAGQ3f,SAArBiI,EAAQgM,WAA6Bza,KAAKya,SAAWhM,EAAQgM,UACjCjU,SAA5BiI,EAAQ+L,kBAAiCxa,KAAKwa,gBAAkB/L,EAAQ+L,iBACjDhU,SAAvBiI,EAAQkM,aAA6B3a,KAAK2a,WAAalM,EAAQkM,YAC3CnU,SAApBiI,EAAQ2X,UAA6BpmB,KAAK6a,YAAcpM,EAAQ2X,SAC9B5f,SAAlCiI,EAAQ4X,wBAAqCrmB,KAAKqmB,sBAAwB5X,EAAQ4X,uBACtD7f,SAA5BiI,EAAQiM,kBAAiC1a,KAAK0a,gBAAkBjM,EAAQiM,iBAC9ClU,SAA1BiI,EAAQqM,gBAA+B9a,KAAK8a,cAAgBrM,EAAQqM,eAEtCtU,SAA9BiI,EAAQsM,oBAAiC/a,KAAK+a,kBAAoBtM,EAAQsM,mBAC7CvU,SAA7BiI,EAAQuM,mBAAiChb,KAAKgb,iBAAmBvM,EAAQuM,kBAC1CxU,SAA/BiI,EAAQwX,qBAAiCjmB,KAAKimB,mBAAqBxX,EAAQwX,oBAErDzf,SAAtBiI,EAAQ2N,YAAyBpc,KAAK2hB,iBAAmBlT,EAAQ2N,WAC3C5V,SAAtBiI,EAAQ4N,YAAyBrc,KAAK6hB,iBAAmBpT,EAAQ4N,WAEhD7V,SAAjBiI,EAAQgN,OAAoBzb,KAAKgiB,YAAcvT,EAAQgN,MACrCjV,SAAlBiI,EAAQiN,QAAqB1b,KAAKkiB,aAAezT,EAAQiN,OACxClV,SAAjBiI,EAAQkN,OAAoB3b,KAAKiiB,YAAcxT,EAAQkN,MACtCnV,SAAjBiI,EAAQmN,OAAoB5b,KAAKoiB,YAAc3T,EAAQmN,MACrCpV,SAAlBiI,EAAQoN,QAAqB7b,KAAKsiB,aAAe7T,EAAQoN,OACxCrV,SAAjBiI,EAAQqN,OAAoB9b,KAAKqiB,YAAc5T,EAAQqN,MACtCtV,SAAjBiI,EAAQsN,OAAoB/b,KAAKwiB,YAAc/T,EAAQsN,MACrCvV,SAAlBiI,EAAQuN,QAAqBhc,KAAK0iB,aAAejU,EAAQuN,OACxCxV,SAAjBiI,EAAQwN,OAAoBjc,KAAKyiB,YAAchU,EAAQwN,MAClCzV,SAArBiI,EAAQyN,WAAwBlc,KAAK4iB,gBAAkBnU,EAAQyN,UAC1C1V,SAArBiI,EAAQ0N,WAAwBnc,KAAK6iB,gBAAkBpU,EAAQ0N,UAEpC3V,SAA3BiI,EAAQyX,iBAA8BA,EAAiBzX,EAAQyX,gBAE5C1f,SAAnB0f,GACFlmB,KAAKib,OAAOyK,eAAeQ,EAAeV,WAAYU,EAAeT,UACrEzlB,KAAKib,OAAO2K,aAAaM,EAAeP,YAGxC3lB,KAAKib,OAAOyK,eAAe,EAAK,IAChC1lB,KAAKib,OAAO2K,aAAa,MAI7B5lB,KAAK0f,oBAAoBjR,GAAWA,EAAQkR,iBAE5C3f,KAAK2kB,QAAQ3kB,KAAKuS,MAAOvS,KAAKwS,QAG1BxS,KAAKuX,WACPvX,KAAKgY,QAAQhY,KAAKuX,WAIhBvX,KAAKimB,oBAAsBjmB,KAAKuhB,YAClCvhB,KAAK8kB,kBAOT9jB,EAAQmS,UAAUsO,OAAS,WACzB,GAAwBjb,SAApBxG,KAAKmb,WACP,KAAM,mCAGRnb,MAAK4kB,gBACL5kB,KAAKmlB,gBACLnlB,KAAKsmB,gBACLtmB,KAAKumB,eACLvmB,KAAKwmB,cAEDxmB,KAAKiN,QAAUjM,EAAQsZ,MAAMkG,MAC/BxgB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,QAC7B1gB,KAAKymB,kBAEEzmB,KAAKiN,QAAUjM,EAAQsZ,MAAMmG,KACpCzgB,KAAK0mB,kBAEE1mB,KAAKiN,QAAUjM,EAAQsZ,MAAM4F,KACpClgB,KAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAC7BngB,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,QAC7BpgB,KAAK2mB,iBAIL3mB,KAAK4mB,iBAGP5mB,KAAK6mB,cACL7mB,KAAK8mB,iBAMP9lB,EAAQmS,UAAUoT,aAAe,WAC/B,GAAIhH,GAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAE5BD,GAAIE,UAAU,EAAG,EAAG1H,EAAOhN,MAAOgN,EAAO/M,SAO3CxR,EAAQmS,UAAU2T,cAAgB,WAChC,GAAI9U,EAEJ,IAAIhS,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,UAC/BtgB,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,QAAS,CAEtC,GAEI2G,GAAUC,EAFVC,EAAmC,IAAzBpnB,KAAKsf,MAAME,WAGrBxf,MAAKiN,QAAUjM,EAAQsZ,MAAMiG,SAC/B2G,EAAWE,EAAU,EACrBD,EAAWC,EAAU,EAAc,EAAVA,IAGzBF,EAAW,GACXC,EAAW,GAGb,IAAI3U,GAAStN,KAAKwH,IAA8B,IAA1B1M,KAAKsf,MAAMuF,aAAqB,KAClDld,EAAM3H,KAAK0Z,OACX2N,EAAQrnB,KAAKsf,MAAME,YAAcxf,KAAK0Z,OACtCjS,EAAO4f,EAAQF,EACf7D,EAAS3b,EAAM6K,EAGrB,GAAI+M,GAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAI5B,IAHAD,EAAIO,UAAY,EAChBP,EAAIQ,KAAO,aAEPvnB,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,SAAU,CAEzC,GAAIkH,GAAO,EACPC,EAAOjV,CACX,KAAKR,EAAIwV,EAAUC,EAAJzV,EAAUA,IAAK,CAC5B,GAAIpE,IAAKoE,EAAIwV,IAASC,EAAOD,GAGzB5a,EAAU,IAAJgB,EACNzC,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,EAElCma,GAAIY,YAAcxc,EAClB4b,EAAIa,YACJb,EAAIc,OAAOpgB,EAAME,EAAMqK,GACvB+U,EAAIe,OAAOT,EAAO1f,EAAMqK,GACxB+U,EAAIlH,SAGNkH,EAAIY,YAAe3nB,KAAKsc,UACxByK,EAAIgB,WAAWtgB,EAAME,EAAKwf,EAAU3U,GAiBtC,GAdIxS,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,UAE/BwG,EAAIY,YAAe3nB,KAAKsc,UACxByK,EAAIiB,UAAahoB,KAAKwc,SACtBuK,EAAIa,YACJb,EAAIc,OAAOpgB,EAAME,GACjBof,EAAIe,OAAOT,EAAO1f,GAClBof,EAAIe,OAAOT,EAAQF,EAAWD,EAAU5D,GACxCyD,EAAIe,OAAOrgB,EAAM6b,GACjByD,EAAIkB,YACJlB,EAAInH,OACJmH,EAAIlH,UAGF7f,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,UAC/BtgB,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,QAAS,CAEtC,GAAI2H,GAAc,EACdC,EAAO,GAAI5mB,GAAWvB,KAAKkc,SAAUlc,KAAKmc,UAAWnc,KAAKmc,SAASnc,KAAKkc,UAAU,GAAG,EAKzF,KAJAiM,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAKkc,UAC3BiM,EAAKE,QAECF,EAAKtY,OACXmC,EAAIsR,GAAU6E,EAAKC,aAAepoB,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAAY1J,EAErFuU,EAAIa,YACJb,EAAIc,OAAOpgB,EAAOygB,EAAalW,GAC/B+U,EAAIe,OAAOrgB,EAAMuK,GACjB+U,EAAIlH,SAEJkH,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,SACnBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAASL,EAAKC,aAAc3gB,EAAO,EAAIygB,EAAalW,GAExDmW,EAAKE,MAGPtB,GAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,KACnB,IAAIE,GAAQzoB,KAAKqa,WACjB0M,GAAIyB,SAASC,EAAOpB,EAAO/D,EAAStjB,KAAK0Z,UAO7C1Y,EAAQmS,UAAU6S,cAAgB,WAGhC,GAFAhmB,KAAKsf,MAAM3L,OAAOsQ,UAAY,GAE1BjkB,KAAKuhB,WAAY,CACnB,GAAI9S,IACFia,QAAW1oB,KAAKqmB,uBAEdtB,EAAS,GAAIzjB,GAAOtB,KAAKsf,MAAM3L,OAAQlF,EAC3CzO,MAAKsf,MAAM3L,OAAOoR,OAASA,EAG3B/kB,KAAKsf,MAAM3L,OAAO1G,MAAM+W,QAAU,OAGlCe,EAAO4D,UAAU3oB,KAAKuhB,WAAWzK,QACjCiO,EAAO6D,gBAAgB5oB,KAAK+a,kBAG5B,IAAI5G,GAAKnU,KACL6oB,EAAW,WACb,GAAI1gB,GAAQ4c,EAAO+D,UAEnB3U,GAAGoN,WAAWwH,YAAY5gB,GAC1BgM,EAAGgH,WAAahH,EAAGoN,WAAWuB,iBAE9B3O,EAAGsN,SAELsD,GAAOiE,oBAAoBH,OAG3B7oB,MAAKsf,MAAM3L,OAAOoR,OAASve,QAO/BxF,EAAQmS,UAAUmT,cAAgB,WACE9f,SAA7BxG,KAAKsf,MAAM3L,OAAOoR,QACrB/kB,KAAKsf,MAAM3L,OAAOoR,OAAOtD,UAQ7BzgB,EAAQmS,UAAU0T,YAAc,WAC9B,GAAI7mB,KAAKuhB,WAAY,CACnB,GAAIhC,GAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAE5BD,GAAIQ,KAAO,aACXR,EAAIkC,UAAY,OAChBlC,EAAIiB,UAAY,OAChBjB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,KAEnB,IAAIxW,GAAI/R,KAAK0Z,OACT1H,EAAIhS,KAAK0Z,MACbqN,GAAIyB,SAASxoB,KAAKuhB,WAAW2H,WAAa,KAAOlpB,KAAKuhB,WAAW4H,mBAAoBpX,EAAGC,KAQ5FhR,EAAQmS,UAAUqT,YAAc,WAC9B,GAEE4C,GAAMC,EAAIlB,EAAMmB,EAChBC,EAAMC,EAAOC,EAAOC,EACpBC,EAAQC,EAASC,EACjBC,EAAQC,EALNxK,EAASvf,KAAKsf,MAAMC,OACtBwH,EAAMxH,EAAOyH,WAAW,KAQ1BD,GAAIQ,KAAO,GAAKvnB,KAAKib,OAAOmE,eAAiB,UAG7C,IAAI4K,GAAW,KAAQhqB,KAAKid,MAAMlL,EAC9BkY,EAAW,KAAQjqB,KAAKid,MAAMjL,EAC9BkY,EAAa,EAAIlqB,KAAKib,OAAOmE,eAC7B+K,EAAWnqB,KAAKib,OAAO6K,iBAAiBN,UAU5C,KAPAuB,EAAIO,UAAY,EAChBgC,EAAoC9iB,SAAtBxG,KAAKkiB,aACnBiG,EAAO,GAAI5mB,GAAWvB,KAAKyb,KAAMzb,KAAK2b,KAAM3b,KAAK0b,MAAO4N,GACxDnB,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAKyb,MAC3B0M,EAAKE,QAECF,EAAKtY,OAAO,CAClB,GAAIkC,GAAIoW,EAAKC,YAETpoB,MAAKya,UACP2O,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK4b,KAAM5b,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK8b,KAAM9b,KAAK+b,OACxDgL,EAAIY,YAAc3nB,KAAKuc,UACvBwK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,WAGJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK4b,KAAM5b,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK4b,KAAKoO,EAAUhqB,KAAK+b,OACjEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAEJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK8b,KAAM9b,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK8b,KAAKkO,EAAUhqB,KAAK+b,OACjEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,UAGN4J,EAASvkB,KAAKqZ,IAAI4L,GAAY,EAAKnqB,KAAK4b,KAAO5b,KAAK8b,KACpDyN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG0X,EAAOzpB,KAAK+b,OAClD7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,MACnBgB,EAAKvX,GAAKkY,GAEHhlB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS,KAAOxoB,KAAKia,YAAYkO,EAAKC,cAAgB,KAAMmB,EAAKxX,EAAGwX,EAAKvX,GAE7EmW,EAAKE,OAWP,IAPAtB,EAAIO,UAAY,EAChBgC,EAAoC9iB,SAAtBxG,KAAKsiB,aACnB6F,EAAO,GAAI5mB,GAAWvB,KAAK4b,KAAM5b,KAAK8b,KAAM9b,KAAK6b,MAAOyN,GACxDnB,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAK4b,MAC3BuM,EAAKE,QAECF,EAAKtY,OACP7P,KAAKya,UACP2O,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAM0M,EAAKC,aAAcpoB,KAAK+b,OAC1EsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAMwM,EAAKC,aAAcpoB,KAAK+b,OACxEgL,EAAIY,YAAc3nB,KAAKuc,UACvBwK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,WAGJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAM0M,EAAKC,aAAcpoB,KAAK+b,OAC1EsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAKwO,EAAU9B,EAAKC,aAAcpoB,KAAK+b,OACjFgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAEJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAMwM,EAAKC,aAAcpoB,KAAK+b,OAC1EsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAKsO,EAAU9B,EAAKC,aAAcpoB,KAAK+b,OACjFgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,UAGN2J,EAAStkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAKyb,KAAOzb,KAAK2b,KACrD4N,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOrB,EAAKC,aAAcpoB,KAAK+b,OAClE7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,MACnBgB,EAAKvX,GAAKkY,GAEHhlB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS,KAAOxoB,KAAKka,YAAYiO,EAAKC,cAAgB,KAAMmB,EAAKxX,EAAGwX,EAAKvX,GAE7EmW,EAAKE,MAaP,KATAtB,EAAIO,UAAY,EAChBgC,EAAoC9iB,SAAtBxG,KAAK0iB,aACnByF,EAAO,GAAI5mB,GAAWvB,KAAK+b,KAAM/b,KAAKic,KAAMjc,KAAKgc,MAAOsN,GACxDnB,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAK+b,MAC3BoM,EAAKE,OAEPmB,EAAStkB,KAAKqZ,IAAI4L,GAAa,EAAKnqB,KAAKyb,KAAOzb,KAAK2b,KACrD8N,EAASvkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAK4b,KAAO5b,KAAK8b,MAC7CqM,EAAKtY,OAEXuZ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOtB,EAAKC,eAC1DrB,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOsB,EAAKrX,EAAImY,EAAYd,EAAKpX,GACrC+U,EAAIlH,SAEJkH,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,SACnBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAASxoB,KAAKma,YAAYgO,EAAKC,cAAgB,IAAKgB,EAAKrX,EAAI,EAAGqX,EAAKpX,GAEzEmW,EAAKE,MAEPtB,GAAIO,UAAY,EAChB8B,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAKic,OACxD8K,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAGJkH,EAAIO,UAAY,EAEhBwC,EAAS9pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK4b,KAAM5b,KAAK+b,OACpEgO,EAAS/pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK4b,KAAM5b,KAAK+b,OACpEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOiC,EAAO/X,EAAG+X,EAAO9X,GAC5B+U,EAAIe,OAAOiC,EAAOhY,EAAGgY,EAAO/X,GAC5B+U,EAAIlH,SAEJiK,EAAS9pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK8b,KAAM9b,KAAK+b,OACpEgO,EAAS/pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK8b,KAAM9b,KAAK+b,OACpEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOiC,EAAO/X,EAAG+X,EAAO9X,GAC5B+U,EAAIe,OAAOiC,EAAOhY,EAAGgY,EAAO/X,GAC5B+U,EAAIlH,SAGJkH,EAAIO,UAAY,EAEhB8B,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK4b,KAAM5b,KAAK+b,OAClEsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK8b,KAAM9b,KAAK+b,OAChEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAEJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK4b,KAAM5b,KAAK+b,OAClEsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK8b,KAAM9b,KAAK+b,OAChEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,QAGJ,IAAIhG,GAAS7Z,KAAK6Z,MACdA,GAAOlU,OAAS,IAClBkkB,EAAU,GAAM7pB,KAAKid,MAAMjL,EAC3BwX,GAASxpB,KAAKyb,KAAOzb,KAAK2b,MAAQ,EAClC8N,EAASvkB,KAAKqZ,IAAI4L,GAAY,EAAKnqB,KAAK4b,KAAOiO,EAAS7pB,KAAK8b,KAAO+N,EACpEN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAK+b,OACtD7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,OAEZrjB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS3O,EAAQ0P,EAAKxX,EAAGwX,EAAKvX,GAIpC,IAAI8H,GAAS9Z,KAAK8Z,MACdA,GAAOnU,OAAS,IAClBikB,EAAU,GAAM5pB,KAAKid,MAAMlL,EAC3ByX,EAAStkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAKyb,KAAOmO,EAAU5pB,KAAK2b,KAAOiO,EACtEH,GAASzpB,KAAK4b,KAAO5b,KAAK8b,MAAQ,EAClCyN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAK+b,OACtD7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,OAEZrjB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS1O,EAAQyP,EAAKxX,EAAGwX,EAAKvX,GAIpC,IAAI+H,GAAS/Z,KAAK+Z,MACdA,GAAOpU,OAAS,IAClBgkB,EAAS,GACTH,EAAStkB,KAAKqZ,IAAI4L,GAAa,EAAKnqB,KAAKyb,KAAOzb,KAAK2b,KACrD8N,EAASvkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAK4b,KAAO5b,KAAK8b,KACrD4N,GAAS1pB,KAAK+b,KAAO/b,KAAKic,MAAQ,EAClCsN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOC,IACrD3C,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,SACnBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAASzO,EAAQwP,EAAKxX,EAAI4X,EAAQJ,EAAKvX,KAU/ChR,EAAQmS,UAAUuU,SAAW,SAAS0C,EAAGC,EAAGC,GAC1C,GAAIC,GAAGC,EAAGC,EAAGC,EAAGC,EAAIC,CAMpB,QAJAF,EAAIJ,EAAID,EACRM,EAAKzlB,KAAKC,MAAMilB,EAAE,IAClBQ,EAAIF,GAAK,EAAIxlB,KAAK2lB,IAAMT,EAAE,GAAM,EAAK,IAE7BO,GACN,IAAK,GAAGJ,EAAIG,EAAGF,EAAII,EAAGH,EAAI,CAAG,MAC7B,KAAK,GAAGF,EAAIK,EAAGJ,EAAIE,EAAGD,EAAI,CAAG,MAC7B,KAAK,GAAGF,EAAI,EAAGC,EAAIE,EAAGD,EAAIG,CAAG,MAC7B,KAAK,GAAGL,EAAI,EAAGC,EAAII,EAAGH,EAAIC,CAAG,MAC7B,KAAK,GAAGH,EAAIK,EAAGJ,EAAI,EAAGC,EAAIC,CAAG,MAC7B,KAAK,GAAGH,EAAIG,EAAGF,EAAI,EAAGC,EAAIG,CAAG,MAE7B,SAASL,EAAI,EAAGC,EAAI,EAAGC,EAAI,EAG7B,MAAO,OAAS7f,SAAW,IAAF2f,GAAS,IAAM3f,SAAW,IAAF4f,GAAS,IAAM5f,SAAW,IAAF6f,GAAS,KAQpFzpB,EAAQmS,UAAUsT,gBAAkB,WAClC,GAEEvU,GAAOmV,EAAO1f,EAAKmjB,EACnBtlB,EACAulB,EAAgB/C,EAAWL,EAAaL,EACxC3b,EAAGC,EAAGC,EAAGmf,EALPzL,EAASvf,KAAKsf,MAAMC,OACtBwH,EAAMxH,EAAOyH,WAAW,KAO1B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAE9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,CAG5B,IAAI4H,GAAcjrB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG8d,OACrEtjB,MAAKmb,WAAW3V,GAAG0lB,KAAOlrB,KAAKwa,gBAAkByQ,EAAYtlB,UAAYslB,EAAY/N,EAIvF,GAAIiO,GAAY,SAAU5lB,EAAGa,GAC3B,MAAOA,GAAE8kB,KAAO3lB,EAAE2lB,KAIpB,IAFAlrB,KAAKmb,WAAWjF,KAAKiV,GAEjBnrB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,SAC/B,IAAKlb,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAMtC,GALA0M,EAAQlS,KAAKmb,WAAW3V,GACxB6hB,EAAQrnB,KAAKmb,WAAW3V,GAAG+d,WAC3B5b,EAAQ3H,KAAKmb,WAAW3V,GAAGge,SAC3BsH,EAAQ9qB,KAAKmb,WAAW3V,GAAGie,WAEbjd,SAAV0L,GAAiC1L,SAAV6gB,GAA+B7gB,SAARmB,GAA+BnB,SAAVskB,EAAqB,CAE1F,GAAI9qB,KAAK4a,gBAAkB5a,KAAK2a,WAAY,CAK1C,GAAIyQ,GAAQ/pB,EAAQgqB,SAASP,EAAM1H,MAAOlR,EAAMkR,OAC5CkI,EAAQjqB,EAAQgqB,SAAS1jB,EAAIyb,MAAOiE,EAAMjE,OAC1CmI,EAAelqB,EAAQmqB,aAAaJ,EAAOE,GAC3C7lB,EAAM8lB,EAAa5lB,QAGvBolB,GAAkBQ,EAAarO,EAAI,MAGnC6N,IAAiB,CAGfA,IAEFC,GAAQ9Y,EAAMA,MAAMgL,EAAImK,EAAMnV,MAAMgL,EAAIvV,EAAIuK,MAAMgL,EAAI4N,EAAM5Y,MAAMgL,GAAK,EACvEvR,EAAoE,KAA/D,GAAKqf,EAAOhrB,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eACnDlP,EAAI,EAEA5L,KAAK2a,YACP9O,EAAI3G,KAAK4G,IAAI,EAAKyf,EAAaxZ,EAAItM,EAAO,EAAG,GAC7CuiB,EAAYhoB,KAAK0nB,SAAS/b,EAAGC,EAAGC,GAChC8b,EAAcK,IAGdnc,EAAI,EACJmc,EAAYhoB,KAAK0nB,SAAS/b,EAAGC,EAAGC,GAChC8b,EAAc3nB,KAAKsc,aAIrB0L,EAAY,OACZL,EAAc3nB,KAAKsc,WAErBgL,EAAY,GAEZP,EAAIO,UAAYA,EAChBP,EAAIiB,UAAYA,EAChBjB,EAAIY,YAAcA,EAClBZ,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIe,OAAOT,EAAMhE,OAAOtR,EAAGsV,EAAMhE,OAAOrR,GACxC+U,EAAIe,OAAOgD,EAAMzH,OAAOtR,EAAG+Y,EAAMzH,OAAOrR,GACxC+U,EAAIe,OAAOngB,EAAI0b,OAAOtR,EAAGpK,EAAI0b,OAAOrR,GACpC+U,EAAIkB,YACJlB,EAAInH,OACJmH,EAAIlH,cAKR,KAAKra,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IACtC0M,EAAQlS,KAAKmb,WAAW3V,GACxB6hB,EAAQrnB,KAAKmb,WAAW3V,GAAG+d,WAC3B5b,EAAQ3H,KAAKmb,WAAW3V,GAAGge,SAEbhd,SAAV0L,IAEAoV,EADEtnB,KAAKwa,gBACK,GAAKtI,EAAMkR,MAAMlG,EAGjB,IAAMld,KAAKkb,IAAIgC,EAAIld,KAAKib,OAAOmE,iBAIjC5Y,SAAV0L,GAAiC1L,SAAV6gB,IAEzB2D,GAAQ9Y,EAAMA,MAAMgL,EAAImK,EAAMnV,MAAMgL,GAAK,EACzCvR,EAAoE,KAA/D,GAAKqf,EAAOhrB,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAEnDiM,EAAIO,UAAYA,EAChBP,EAAIY,YAAc3nB,KAAK0nB,SAAS/b,EAAG,EAAG,GACtCob,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIe,OAAOT,EAAMhE,OAAOtR,EAAGsV,EAAMhE,OAAOrR,GACxC+U,EAAIlH,UAGQrZ,SAAV0L,GAA+B1L,SAARmB,IAEzBqjB,GAAQ9Y,EAAMA,MAAMgL,EAAIvV,EAAIuK,MAAMgL,GAAK,EACvCvR,EAAoE,KAA/D,GAAKqf,EAAOhrB,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAEnDiM,EAAIO,UAAYA,EAChBP,EAAIY,YAAc3nB,KAAK0nB,SAAS/b,EAAG,EAAG,GACtCob,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIe,OAAOngB,EAAI0b,OAAOtR,EAAGpK,EAAI0b,OAAOrR,GACpC+U,EAAIlH,YAWZ7e,EAAQmS,UAAUyT,eAAiB,WACjC,GAEIphB,GAFA+Z,EAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAG5B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAC9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,CAG5B,IAAI4H,GAAcjrB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG8d,OACrEtjB,MAAKmb,WAAW3V,GAAG0lB,KAAOlrB,KAAKwa,gBAAkByQ,EAAYtlB,UAAYslB,EAAY/N,EAIvF,GAAIiO,GAAY,SAAU5lB,EAAGa,GAC3B,MAAOA,GAAE8kB,KAAO3lB,EAAE2lB,KAEpBlrB,MAAKmb,WAAWjF,KAAKiV,EAGrB,IAAI/D,GAAmC,IAAzBpnB,KAAKsf,MAAME,WACzB,KAAKha,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI0M,GAAQlS,KAAKmb,WAAW3V,EAE5B,IAAIxF,KAAKiN,QAAUjM,EAAQsZ,MAAM+F,QAAS,CAGxC,GAAI+I,GAAOppB,KAAKud,eAAerL,EAAMoR,OACrCyD,GAAIO,UAAY,EAChBP,EAAIY,YAAc3nB,KAAKuc,UACvBwK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAO5V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIlH,SAIN,GAAIxN,EAEFA,GADErS,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,QACxB6G,EAAQ,EAAI,EAAEA,GAAWlV,EAAMA,MAAM7K,MAAQrH,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAGpFkL,CAGT,IAAIqE,EAEFA,GADEzrB,KAAKwa,gBACEnI,GAAQH,EAAMkR,MAAMlG,EAGpB7K,IAASrS,KAAKkb,IAAIgC,EAAIld,KAAKib,OAAOmE,gBAEhC,EAATqM,IACFA,EAAS,EAGX,IAAI7e,GAAKzB,EAAO4U,CACZ/f,MAAKiN,QAAUjM,EAAQsZ,MAAMgG,UAE/B1T,EAAqE,KAA9D,GAAKsF,EAAMA,MAAM7K,MAAQrH,KAAKkc,UAAYlc,KAAKid,MAAM5V,OAC5D8D,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAE7B5M,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,SACpCpV,EAAQnL,KAAKwc,SACbuD,EAAc/f,KAAKyc,iBAInB7P,EAA+E,KAAxE,GAAKsF,EAAMA,MAAMgL,EAAIld,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAC9D3P,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAItCma,EAAIO,UAAY,EAChBP,EAAIY,YAAc5H,EAClBgH,EAAIiB,UAAY7c,EAChB4b,EAAIa,YACJb,EAAI2E,IAAIxZ,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,EAAGyZ,EAAQ,EAAW,EAARvmB,KAAKymB,IAAM,GAC9D5E,EAAInH,OACJmH,EAAIlH,YAQR7e,EAAQmS,UAAUwT,eAAiB,WACjC,GAEInhB,GAAGomB,EAAGC,EAASC,EAFfvM,EAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAG5B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAC9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,CAG5B,IAAI4H,GAAcjrB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG8d,OACrEtjB,MAAKmb,WAAW3V,GAAG0lB,KAAOlrB,KAAKwa,gBAAkByQ,EAAYtlB,UAAYslB,EAAY/N,EAIvF,GAAIiO,GAAY,SAAU5lB,EAAGa,GAC3B,MAAOA,GAAE8kB,KAAO3lB,EAAE2lB,KAEpBlrB,MAAKmb,WAAWjF,KAAKiV,EAGrB,IAAIY,GAAS/rB,KAAKoc,UAAY,EAC1B4P,EAAShsB,KAAKqc,UAAY,CAC9B,KAAK7W,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAGIoH,GAAKzB,EAAO4U,EAHZ7N,EAAQlS,KAAKmb,WAAW3V,EAIxBxF,MAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAE/BvT,EAAqE,KAA9D,GAAKsF,EAAMA,MAAM7K,MAAQrH,KAAKkc,UAAYlc,KAAKid,MAAM5V,OAC5D8D,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAE7B5M,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,SACpCjV,EAAQnL,KAAKwc,SACbuD,EAAc/f,KAAKyc,iBAInB7P,EAA+E,KAAxE,GAAKsF,EAAMA,MAAMgL,EAAIld,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAC9D3P,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAIlC5M,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,UAC/B2L,EAAU/rB,KAAKoc,UAAY,IAAOlK,EAAMA,MAAM7K,MAAQrH,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAAY,GAAM,IAC/G8P,EAAUhsB,KAAKqc,UAAY,IAAOnK,EAAMA,MAAM7K,MAAQrH,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAAY,GAAM,IAIjH,IAAI/H,GAAKnU,KACLwd,EAAUtL,EAAMA,MAChBvK,IACDuK,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KACnEhL,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KACnEhL,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KACnEhL,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KAElEoG,IACDpR,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,QAChE7J,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,QAChE7J,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,QAChE7J,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,OAInEpU,GAAIU,QAAQ,SAAU0a,GACpBA,EAAIM,OAASlP,EAAGoJ,eAAewF,EAAI7Q,SAErCoR,EAAOjb,QAAQ,SAAU0a,GACvBA,EAAIM,OAASlP,EAAGoJ,eAAewF,EAAI7Q,QAIrC,IAAI+Z,KACDH,QAASnkB,EAAKukB,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAC7D4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAChG4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAChG4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAChG4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,QAKnG,KAHAA,EAAM+Z,SAAWA,EAGZL,EAAI,EAAGA,EAAIK,EAAStmB,OAAQimB,IAAK,CACpCC,EAAUI,EAASL,EACnB,IAAIQ,GAAcpsB,KAAK0d,2BAA2BmO,EAAQK,OAC1DL,GAAQX,KAAOlrB,KAAKwa,gBAAkB4R,EAAYzmB,UAAYymB,EAAYlP,EAwB5E,IAjBA+O,EAAS/V,KAAK,SAAU3Q,EAAGa,GACzB,GAAIimB,GAAOjmB,EAAE8kB,KAAO3lB,EAAE2lB,IACtB,OAAImB,GAAaA,EAGb9mB,EAAEumB,UAAYnkB,EAAY,EAC1BvB,EAAE0lB,UAAYnkB,EAAY,GAGvB,IAITof,EAAIO,UAAY,EAChBP,EAAIY,YAAc5H,EAClBgH,EAAIiB,UAAY7c,EAEXygB,EAAI,EAAGA,EAAIK,EAAStmB,OAAQimB,IAC/BC,EAAUI,EAASL,GACnBE,EAAUD,EAAQC,QAClB/E,EAAIa,YACJb,EAAIc,OAAOiE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAInH,OACJmH,EAAIlH,YAUV7e,EAAQmS,UAAUuT,gBAAkB,WAClC,GAEExU,GAAO1M,EAFL+Z,EAASvf,KAAKsf,MAAMC,OACtBwH,EAAMxH,EAAOyH,WAAW,KAG1B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAE9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,EAc9B,IAVIrjB,KAAKmb,WAAWxV,OAAS,IAC3BuM,EAAQlS,KAAKmb,WAAW,GAExB4L,EAAIO,UAAY,EAChBP,EAAIY,YAAc,OAClBZ,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,IAIrCxM,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IACtC0M,EAAQlS,KAAKmb,WAAW3V,GACxBuhB,EAAIe,OAAO5V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,EAItChS,MAAKmb,WAAWxV,OAAS,GAC3BohB,EAAIlH,WASR7e,EAAQmS,UAAUgR,aAAe,SAAS7a,GAWxC,GAVAA,EAAQA,GAASC,OAAOD,MAIpBtJ,KAAKssB,gBACPtsB,KAAKusB,WAAWjjB,GAIlBtJ,KAAKssB,eAAiBhjB,EAAMkjB,MAAyB,IAAhBljB,EAAMkjB,MAAiC,IAAjBljB,EAAMmjB,OAC5DzsB,KAAKssB,gBAAmBtsB,KAAK0sB,UAAlC,CAGA1sB,KAAK2sB,YAAcjQ,EAAUpT,GAC7BtJ,KAAK4sB,YAAc/P,EAAUvT,GAE7BtJ,KAAK6sB,WAAa,GAAIvoB,MAAKtE,KAAK4P,OAChC5P,KAAK8sB,SAAW,GAAIxoB,MAAKtE,KAAK6P,KAC9B7P,KAAK+sB,iBAAmB/sB,KAAKib,OAAO6K,iBAEpC9lB,KAAKsf,MAAMrS,MAAM+f,OAAS,MAK1B,IAAI7Y,GAAKnU,IACTA,MAAKitB,YAAc,SAAU3jB,GAAQ6K,EAAG+Y,aAAa5jB,IACrDtJ,KAAKmtB,UAAc,SAAU7jB,GAAQ6K,EAAGoY,WAAWjjB,IACnD3I,EAAKgI,iBAAiB4I,SAAU,YAAa4C,EAAG8Y,aAChDtsB,EAAKgI,iBAAiB4I,SAAU,UAAW4C,EAAGgZ,WAC9CxsB,EAAK0I,eAAeC,KAStBtI,EAAQmS,UAAU+Z,aAAe,SAAU5jB,GACzCA,EAAQA,GAASC,OAAOD,KAGxB,IAAI8jB,GAAQ/H,WAAW3I,EAAUpT,IAAUtJ,KAAK2sB,YAC5CU,EAAQhI,WAAWxI,EAAUvT,IAAUtJ,KAAK4sB,YAE5CU,EAAgBttB,KAAK+sB,iBAAiBvH,WAAa4H,EAAQ,IAC3DG,EAAcvtB,KAAK+sB,iBAAiBtH,SAAW4H,EAAQ,IAEvDG,EAAY,EACZC,EAAYvoB,KAAKkZ,IAAIoP,EAAY,IAAM,EAAItoB,KAAKymB,GAIhDzmB,MAAK2lB,IAAI3lB,KAAKkZ,IAAIkP,IAAkBG,IACtCH,EAAgBpoB,KAAKwoB,MAAOJ,EAAgBpoB,KAAKymB,IAAOzmB,KAAKymB,GAAK,MAEhEzmB,KAAK2lB,IAAI3lB,KAAKqZ,IAAI+O,IAAkBG,IACtCH,GAAiBpoB,KAAKwoB,MAAOJ,EAAepoB,KAAKymB,GAAK,IAAQ,IAAOzmB,KAAKymB,GAAK,MAI7EzmB,KAAK2lB,IAAI3lB,KAAKkZ,IAAImP,IAAgBE,IACpCF,EAAcroB,KAAKwoB,MAAOH,EAAcroB,KAAKymB,IAAOzmB,KAAKymB,IAEvDzmB,KAAK2lB,IAAI3lB,KAAKqZ,IAAIgP,IAAgBE,IACpCF,GAAeroB,KAAKwoB,MAAOH,EAAaroB,KAAKymB,GAAK,IAAQ,IAAOzmB,KAAKymB,IAGxE3rB,KAAKib,OAAOyK,eAAe4H,EAAeC,GAC1CvtB,KAAKyhB,QAGL,IAAIkM,GAAa3tB,KAAK6lB,mBACtB7lB,MAAK4tB,KAAK,uBAAwBD,GAElChtB,EAAK0I,eAAeC,IAStBtI,EAAQmS,UAAUoZ,WAAa,SAAUjjB,GACvCtJ,KAAKsf,MAAMrS,MAAM+f,OAAS,OAC1BhtB,KAAKssB,gBAAiB,EAGtB3rB,EAAKwI,oBAAoBoI,SAAU,YAAavR,KAAKitB,aACrDtsB,EAAKwI,oBAAoBoI,SAAU,UAAavR,KAAKmtB,WACrDxsB,EAAK0I,eAAeC,IAOtBtI,EAAQmS,UAAUsR,WAAa,SAAUnb,GACvC,GAAIkP,GAAQ,IACRqV,EAAe7tB,KAAKsf,MAAM9X,wBAC1BsmB,EAASpR,EAAUpT,GAASukB,EAAapmB,KACzCsmB,EAASlR,EAAUvT,GAASukB,EAAalmB,GAE7C,IAAK3H,KAAK6a,YAAV,CASA,GALI7a,KAAKguB,gBACP3U,aAAarZ,KAAKguB,gBAIhBhuB,KAAKssB,eAEP,WADAtsB,MAAKiuB,cAIP,IAAIjuB,KAAKomB,SAAWpmB,KAAKomB,QAAQ8H,UAAW,CAE1C,GAAIA,GAAYluB,KAAKmuB,iBAAiBL,EAAQC,EAC1CG,KAAcluB,KAAKomB,QAAQ8H,YAEzBA,EACFluB,KAAKouB,aAAaF,GAGlBluB,KAAKiuB,oBAIN,CAEH,GAAI9Z,GAAKnU,IACTA,MAAKguB,eAAiB1U,WAAW,WAC/BnF,EAAG6Z,eAAiB,IAGpB,IAAIE,GAAY/Z,EAAGga,iBAAiBL,EAAQC,EACxCG,IACF/Z,EAAGia,aAAaF,IAEjB1V,MAOPxX,EAAQmS,UAAUkR,cAAgB,SAAS/a,GACzCtJ,KAAK0sB,WAAY,CAEjB,IAAIvY,GAAKnU,IACTA,MAAKquB,YAAc,SAAU/kB,GAAQ6K,EAAGma,aAAahlB,IACrDtJ,KAAKuuB,WAAc,SAAUjlB,GAAQ6K,EAAGqa,YAAYllB,IACpD3I,EAAKgI,iBAAiB4I,SAAU,YAAa4C,EAAGka,aAChD1tB,EAAKgI,iBAAiB4I,SAAU,WAAY4C,EAAGoa,YAE/CvuB,KAAKmkB,aAAa7a,IAMpBtI,EAAQmS,UAAUmb,aAAe,SAAShlB,GACxCtJ,KAAKktB,aAAa5jB,IAMpBtI,EAAQmS,UAAUqb,YAAc,SAASllB,GACvCtJ,KAAK0sB,WAAY,EAEjB/rB,EAAKwI,oBAAoBoI,SAAU,YAAavR,KAAKquB,aACrD1tB,EAAKwI,oBAAoBoI,SAAU,WAAcvR,KAAKuuB,YAEtDvuB,KAAKusB,WAAWjjB,IASlBtI,EAAQmS,UAAUoR,SAAW,SAASjb,GAC/BA,IACHA,EAAQC,OAAOD,MAGjB,IAAImlB,GAAQ,CAYZ,IAXInlB,EAAMolB,WACRD,EAAQnlB,EAAMolB,WAAW,IAChBplB,EAAMqlB,SAGfF,GAASnlB,EAAMqlB,OAAO,GAMpBF,EAAO,CACT,GAAIG,GAAY5uB,KAAKib,OAAOmE,eACxByP,EAAYD,GAAa,EAAIH,EAAQ,GAEzCzuB,MAAKib,OAAO2K,aAAaiJ,GACzB7uB,KAAKyhB,SAELzhB,KAAKiuB,eAIP,GAAIN,GAAa3tB,KAAK6lB,mBACtB7lB,MAAK4tB,KAAK,uBAAwBD,GAKlChtB,EAAK0I,eAAeC,IAUtBtI,EAAQmS,UAAU2b,gBAAkB,SAAU5c,EAAO6c,GAKnD,QAASC,GAAMjd,GACb,MAAOA,GAAI,EAAI,EAAQ,EAAJA,EAAQ,GAAK,EALlC,GAAIxM,GAAIwpB,EAAS,GACf3oB,EAAI2oB,EAAS,GACbtuB,EAAIsuB,EAAS,GAMXE,EAAKD,GAAM5oB,EAAE2L,EAAIxM,EAAEwM,IAAMG,EAAMF,EAAIzM,EAAEyM,IAAM5L,EAAE4L,EAAIzM,EAAEyM,IAAME,EAAMH,EAAIxM,EAAEwM,IACrEmd,EAAKF,GAAMvuB,EAAEsR,EAAI3L,EAAE2L,IAAMG,EAAMF,EAAI5L,EAAE4L,IAAMvR,EAAEuR,EAAI5L,EAAE4L,IAAME,EAAMH,EAAI3L,EAAE2L,IACrEod,EAAKH,GAAMzpB,EAAEwM,EAAItR,EAAEsR,IAAMG,EAAMF,EAAIvR,EAAEuR,IAAMzM,EAAEyM,EAAIvR,EAAEuR,IAAME,EAAMH,EAAItR,EAAEsR,GAGzE,SAAc,GAANkd,GAAiB,GAANC,GAAWD,GAAMC,GAC3B,GAANA,GAAiB,GAANC,GAAWD,GAAMC,GACtB,GAANF,GAAiB,GAANE,GAAWF,GAAME,IAUjCnuB,EAAQmS,UAAUgb,iBAAmB,SAAUpc,EAAGC,GAChD,GAAIxM,GACF4pB,EAAU,IACVlB,EAAY,KACZmB,EAAmB,KACnBC,EAAc,KACdpD,EAAS,GAAI9qB,GAAQ2Q,EAAGC,EAE1B,IAAIhS,KAAKiN,QAAUjM,EAAQsZ,MAAM4F,KAC/BlgB,KAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAC7BngB,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,QAE7B,IAAK5a,EAAIxF,KAAKmb,WAAWxV,OAAS,EAAGH,GAAK,EAAGA,IAAK,CAChD0oB,EAAYluB,KAAKmb,WAAW3V,EAC5B,IAAIymB,GAAYiC,EAAUjC,QAC1B,IAAIA,EACF,IAAK,GAAIrgB,GAAIqgB,EAAStmB,OAAS,EAAGiG,GAAK,EAAGA,IAAK,CAE7C,GAAIigB,GAAUI,EAASrgB,GACnBkgB,EAAUD,EAAQC,QAClByD,GAAazD,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,QAC9DmM,GAAa1D,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,OAClE,IAAIrjB,KAAK8uB,gBAAgB5C,EAAQqD,IAC/BvvB,KAAK8uB,gBAAgB5C,EAAQsD,GAE7B,MAAOtB,QAQf,KAAK1oB,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C0oB,EAAYluB,KAAKmb,WAAW3V,EAC5B,IAAI0M,GAAQgc,EAAU7K,MACtB,IAAInR,EAAO,CACT,GAAIud,GAAQvqB,KAAK2lB,IAAI9Y,EAAIG,EAAMH,GAC3B2d,EAAQxqB,KAAK2lB,IAAI7Y,EAAIE,EAAMF,GAC3BkZ,EAAQhmB,KAAKyqB,KAAKF,EAAQA,EAAQC,EAAQA,IAEzB,OAAhBJ,GAA+BA,EAAPpE,IAA8BkE,EAAPlE,IAClDoE,EAAcpE,EACdmE,EAAmBnB,IAO3B,MAAOmB,IAQTruB,EAAQmS,UAAUib,aAAe,SAAUF,GACzC,GAAI0B,GAASC,EAAMC,CAEd9vB,MAAKomB,SAiCRwJ,EAAU5vB,KAAKomB,QAAQ2J,IAAIH,QAC3BC,EAAQ7vB,KAAKomB,QAAQ2J,IAAIF,KACzBC,EAAQ9vB,KAAKomB,QAAQ2J,IAAID,MAlCzBF,EAAUre,SAASM,cAAc,OACjC+d,EAAQ3iB,MAAM2W,SAAW,WACzBgM,EAAQ3iB,MAAM+W,QAAU,OACxB4L,EAAQ3iB,MAAMb,OAAS,oBACvBwjB,EAAQ3iB,MAAM9B,MAAQ,UACtBykB,EAAQ3iB,MAAMd,WAAa,wBAC3ByjB,EAAQ3iB,MAAM+iB,aAAe,MAC7BJ,EAAQ3iB,MAAMgjB,UAAY,qCAE1BJ,EAAOte,SAASM,cAAc,OAC9Bge,EAAK5iB,MAAM2W,SAAW,WACtBiM,EAAK5iB,MAAMuF,OAAS,OACpBqd,EAAK5iB,MAAMsF,MAAQ,IACnBsd,EAAK5iB,MAAMijB,WAAa,oBAExBJ,EAAMve,SAASM,cAAc,OAC7Bie,EAAI7iB,MAAM2W,SAAW,WACrBkM,EAAI7iB,MAAMuF,OAAS,IACnBsd,EAAI7iB,MAAMsF,MAAQ,IAClBud,EAAI7iB,MAAMb,OAAS,oBACnB0jB,EAAI7iB,MAAM+iB,aAAe,MAEzBhwB,KAAKomB,SACH8H,UAAW,KACX6B,KACEH,QAASA,EACTC,KAAMA,EACNC,IAAKA,KAUX9vB,KAAKiuB,eAELjuB,KAAKomB,QAAQ8H,UAAYA,EAEvB0B,EAAQ3L,UADsB,kBAArBjkB,MAAK6a,YACM7a,KAAK6a,YAAYqT,EAAUhc,OAG3B,6BACMgc,EAAUhc,MAAMH,EAAI,gCACpBmc,EAAUhc,MAAMF,EAAI,gCACpBkc,EAAUhc,MAAMgL,EAAI,qBAIhD0S,EAAQ3iB,MAAMxF,KAAQ,IACtBmoB,EAAQ3iB,MAAMtF,IAAQ,IACtB3H,KAAKsf,MAAM7N,YAAYme,GACvB5vB,KAAKsf,MAAM7N,YAAYoe,GACvB7vB,KAAKsf,MAAM7N,YAAYqe,EAGvB,IAAIK,GAAgBP,EAAQQ,YACxBC,EAAkBT,EAAQU,aAC1BC,EAAgBV,EAAKS,aACrBE,EAAcV,EAAIM,YAClBK,EAAgBX,EAAIQ,aAEpB7oB,EAAOymB,EAAU7K,OAAOtR,EAAIoe,EAAe,CAC/C1oB,GAAOvC,KAAK4G,IAAI5G,KAAKwH,IAAIjF,EAAM,IAAKzH,KAAKsf,MAAME,YAAc,GAAK2Q,GAElEN,EAAK5iB,MAAMxF,KAASymB,EAAU7K,OAAOtR,EAAI,KACzC8d,EAAK5iB,MAAMtF,IAAUumB,EAAU7K,OAAOrR,EAAIue,EAAc,KACxDX,EAAQ3iB,MAAMxF,KAAQA,EAAO,KAC7BmoB,EAAQ3iB,MAAMtF,IAASumB,EAAU7K,OAAOrR,EAAIue,EAAaF,EAAiB,KAC1EP,EAAI7iB,MAAMxF,KAAWymB,EAAU7K,OAAOtR,EAAIye,EAAW,EAAK,KAC1DV,EAAI7iB,MAAMtF,IAAWumB,EAAU7K,OAAOrR,EAAIye,EAAY,EAAK,MAO7DzvB,EAAQmS,UAAU8a,aAAe,WAC/B,GAAIjuB,KAAKomB,QAAS,CAChBpmB,KAAKomB,QAAQ8H,UAAY,IAEzB,KAAK,GAAIroB,KAAQ7F,MAAKomB,QAAQ2J,IAC5B,GAAI/vB,KAAKomB,QAAQ2J,IAAIjqB,eAAeD,GAAO,CACzC,GAAI0B,GAAOvH,KAAKomB,QAAQ2J,IAAIlqB,EACxB0B,IAAQA,EAAKsC,YACftC,EAAKsC,WAAWsH,YAAY5J,MA8BtC1H,EAAOD,QAAUoB,GAKb,SAASnB,EAAQD,EAASM,GAc9B,QAASgB,KACPlB,KAAK0wB,YAAc,GAAIrvB,GACvBrB,KAAK2wB,eACL3wB,KAAK2wB,YAAYnL,WAAa,EAC9BxlB,KAAK2wB,YAAYlL,SAAW,EAC5BzlB,KAAK4wB,UAAY,IAEjB5wB,KAAK6wB,eAAiB,GAAIxvB,GAC1BrB,KAAK8wB,eAAkB,GAAIzvB,GAAQ,GAAI6D,KAAKymB,GAAI,EAAG,GAEnD3rB,KAAK+wB,6BAtBP,GAAI1vB,GAAUnB,EAAoB,GA+BlCgB,GAAOiS,UAAUmK,eAAiB,SAASvL,EAAGC,EAAGkL,GAC/Cld,KAAK0wB,YAAY3e,EAAIA,EACrB/R,KAAK0wB,YAAY1e,EAAIA,EACrBhS,KAAK0wB,YAAYxT,EAAIA,EAErBld,KAAK+wB,8BAWP7vB,EAAOiS,UAAUuS,eAAiB,SAASF,EAAYC,GAClCjf,SAAfgf,IACFxlB,KAAK2wB,YAAYnL,WAAaA,GAGfhf,SAAbif,IACFzlB,KAAK2wB,YAAYlL,SAAWA,EACxBzlB,KAAK2wB,YAAYlL,SAAW,IAAGzlB,KAAK2wB,YAAYlL,SAAW,GAC3DzlB,KAAK2wB,YAAYlL,SAAW,GAAIvgB,KAAKymB,KAAI3rB,KAAK2wB,YAAYlL,SAAW,GAAIvgB,KAAKymB,MAGjEnlB,SAAfgf,GAAyChf,SAAbif,IAC9BzlB,KAAK+wB,8BAQT7vB,EAAOiS,UAAU2S,eAAiB,WAChC,GAAIkL,KAIJ,OAHAA,GAAIxL,WAAaxlB,KAAK2wB,YAAYnL,WAClCwL,EAAIvL,SAAWzlB,KAAK2wB,YAAYlL,SAEzBuL,GAOT9vB,EAAOiS,UAAUyS,aAAe,SAASjgB,GACxBa,SAAXb,IAGJ3F,KAAK4wB,UAAYjrB,EAKb3F,KAAK4wB,UAAY,MAAM5wB,KAAK4wB,UAAY,KACxC5wB,KAAK4wB,UAAY,IAAK5wB,KAAK4wB,UAAY,GAE3C5wB,KAAK+wB,+BAOP7vB,EAAOiS,UAAUiM,aAAe,WAC9B,MAAOpf,MAAK4wB,WAOd1vB,EAAOiS,UAAU6K,kBAAoB,WACnC,MAAOhe,MAAK6wB,gBAOd3vB,EAAOiS,UAAUkL,kBAAoB,WACnC,MAAOre,MAAK8wB,gBAOd5vB,EAAOiS,UAAU4d,2BAA6B,WAE5C/wB,KAAK6wB,eAAe9e,EAAI/R,KAAK0wB,YAAY3e,EAAI/R,KAAK4wB,UAAY1rB,KAAKkZ,IAAIpe,KAAK2wB,YAAYnL,YAActgB,KAAKqZ,IAAIve,KAAK2wB,YAAYlL,UAChIzlB,KAAK6wB,eAAe7e,EAAIhS,KAAK0wB,YAAY1e,EAAIhS,KAAK4wB,UAAY1rB,KAAKqZ,IAAIve,KAAK2wB,YAAYnL,YAActgB,KAAKqZ,IAAIve,KAAK2wB,YAAYlL,UAChIzlB,KAAK6wB,eAAe3T,EAAIld,KAAK0wB,YAAYxT,EAAIld,KAAK4wB,UAAY1rB,KAAKkZ,IAAIpe,KAAK2wB,YAAYlL,UAGxFzlB,KAAK8wB,eAAe/e,EAAI7M,KAAKymB,GAAG,EAAI3rB,KAAK2wB,YAAYlL,SACrDzlB,KAAK8wB,eAAe9e,EAAI,EACxBhS,KAAK8wB,eAAe5T,GAAKld,KAAK2wB,YAAYnL,YAG5C3lB,EAAOD,QAAUsB,GAIb,SAASrB,EAAQD,EAASM,GAW9B,QAASiB,GAAQuR,EAAMqO,EAAQkQ,GAC7BjxB,KAAK0S,KAAOA,EACZ1S,KAAK+gB,OAASA,EACd/gB,KAAKixB,MAAQA,EAEbjxB,KAAKmI,MAAQ3B,OACbxG,KAAKqH,MAAQb,OAGbxG,KAAK8W,OAASma,EAAMjQ,kBAAkBtO,EAAKwC,MAAOlV,KAAK+gB,QAGvD/gB,KAAK8W,OAAOZ,KAAK,SAAU3Q,EAAGa,GAC5B,MAAOb,GAAIa,EAAI,EAAQA,EAAJb,EAAQ,GAAK,IAG9BvF,KAAK8W,OAAOnR,OAAS,GACvB3F,KAAK+oB,YAAY,GAInB/oB,KAAKmb,cAELnb,KAAKM,QAAS,EACdN,KAAKkxB,eAAiB1qB,OAElByqB,EAAMjW,kBACRhb,KAAKM,QAAS,EACdN,KAAKmxB,oBAGLnxB,KAAKM,QAAS,EAxClB,GAAIQ,GAAWZ,EAAoB,EAiDnCiB,GAAOgS,UAAUie,SAAW,WAC1B,MAAOpxB,MAAKM,QAQda,EAAOgS,UAAUke,kBAAoB,WAInC,IAHA,GAAI5rB,GAAMzF,KAAK8W,OAAOnR,OAElBH,EAAI,EACDxF,KAAKmb,WAAW3V,IACrBA,GAGF,OAAON,MAAKwoB,MAAMloB,EAAIC,EAAM,MAQ9BtE,EAAOgS,UAAU+V,SAAW,WAC1B,MAAOlpB,MAAKixB,MAAM7W,aAQpBjZ,EAAOgS,UAAUme,UAAY,WAC3B,MAAOtxB,MAAK+gB,QAOd5f,EAAOgS,UAAUgW,iBAAmB,WAClC,MAAmB3iB,UAAfxG,KAAKmI,MACA3B,OAEFxG,KAAK8W,OAAO9W,KAAKmI,QAO1BhH,EAAOgS,UAAUoe,UAAY,WAC3B,MAAOvxB,MAAK8W,QAQd3V,EAAOgS,UAAUyB,SAAW,SAASzM,GACnC,GAAIA,GAASnI,KAAK8W,OAAOnR,OACvB,KAAM,2BAER,OAAO3F,MAAK8W,OAAO3O,IASrBhH,EAAOgS,UAAU2P,eAAiB,SAAS3a,GAIzC,GAHc3B,SAAV2B,IACFA,EAAQnI,KAAKmI,OAED3B,SAAV2B,EACF,QAEF,IAAIgT,EACJ,IAAInb,KAAKmb,WAAWhT,GAClBgT,EAAanb,KAAKmb,WAAWhT,OAE1B,CACH,GAAIyF,KACJA,GAAEmT,OAAS/gB,KAAK+gB,OAChBnT,EAAEvG,MAAQrH,KAAK8W,OAAO3O,EAEtB,IAAIqpB,GAAW,GAAI1wB,GAASd,KAAK0S,MAAMiB,OAAQ,SAAUtE,GAAO,MAAQA,GAAKzB,EAAEmT,SAAWnT,EAAEvG,SAAW6N,KACvGiG,GAAanb,KAAKixB,MAAMnO,eAAe0O,GAEvCxxB,KAAKmb,WAAWhT,GAASgT,EAG3B,MAAOA,IAQTha,EAAOgS,UAAUqO,kBAAoB,SAASlZ,GAC5CtI,KAAKkxB,eAAiB5oB,GASxBnH,EAAOgS,UAAU4V,YAAc,SAAS5gB,GACtC,GAAIA,GAASnI,KAAK8W,OAAOnR,OACvB,KAAM,2BAER3F,MAAKmI,MAAQA,EACbnI,KAAKqH,MAAQrH,KAAK8W,OAAO3O,IAO3BhH,EAAOgS,UAAUge,iBAAmB,SAAShpB,GAC7B3B,SAAV2B,IACFA,EAAQ,EAEV,IAAImX,GAAQtf,KAAKixB,MAAM3R,KAEvB,IAAInX,EAAQnI,KAAK8W,OAAOnR,OAAQ,CAC9B,CAAqB3F,KAAK8iB,eAAe3a,GAIlB3B,SAAnB8Y,EAAMmS,WACRnS,EAAMmS,SAAWlgB,SAASM,cAAc,OACxCyN,EAAMmS,SAASxkB,MAAM2W,SAAW,WAChCtE,EAAMmS,SAASxkB,MAAM9B,MAAQ,OAC7BmU,EAAM7N,YAAY6N,EAAMmS,UAE1B,IAAIA,GAAWzxB,KAAKqxB,mBACpB/R,GAAMmS,SAASxN,UAAY,wBAA0BwN,EAAW,IAEhEnS,EAAMmS,SAASxkB,MAAMqW,OAAS,OAC9BhE,EAAMmS,SAASxkB,MAAMxF,KAAO,MAE5B,IAAI0M,GAAKnU,IACTsZ,YAAW,WAAYnF,EAAGgd,iBAAiBhpB,EAAM,IAAM,IACvDnI,KAAKM,QAAS,MAGdN,MAAKM,QAAS,EAGSkG,SAAnB8Y,EAAMmS,WACRnS,EAAMnO,YAAYmO,EAAMmS,UACxBnS,EAAMmS,SAAWjrB,QAGfxG,KAAKkxB,gBACPlxB,KAAKkxB,kBAIXrxB,EAAOD,QAAUuB,GAKb,SAAStB,GAOb,QAASuB,GAAS2Q,EAAGC,GACnBhS,KAAK+R,EAAUvL,SAANuL,EAAkBA,EAAI,EAC/B/R,KAAKgS,EAAUxL,SAANwL,EAAkBA,EAAI,EAGjCnS,EAAOD,QAAUwB,GAKb,SAASvB,GAQb,QAASwB,GAAQ0Q,EAAGC,EAAGkL,GACrBld,KAAK+R,EAAUvL,SAANuL,EAAkBA,EAAI,EAC/B/R,KAAKgS,EAAUxL,SAANwL,EAAkBA,EAAI,EAC/BhS,KAAKkd,EAAU1W,SAAN0W,EAAkBA,EAAI,EASjC7b,EAAQgqB,SAAW,SAAS9lB,EAAGa,GAC7B,GAAIsrB,GAAM,GAAIrwB,EAId,OAHAqwB,GAAI3f,EAAIxM,EAAEwM,EAAI3L,EAAE2L,EAChB2f,EAAI1f,EAAIzM,EAAEyM,EAAI5L,EAAE4L,EAChB0f,EAAIxU,EAAI3X,EAAE2X,EAAI9W,EAAE8W,EACTwU,GASTrwB,EAAQ4R,IAAM,SAAS1N,EAAGa,GACxB,GAAIurB,GAAM,GAAItwB,EAId;MAHAswB,GAAI5f,EAAIxM,EAAEwM,EAAI3L,EAAE2L,EAChB4f,EAAI3f,EAAIzM,EAAEyM,EAAI5L,EAAE4L,EAChB2f,EAAIzU,EAAI3X,EAAE2X,EAAI9W,EAAE8W,EACTyU,GASTtwB,EAAQ8qB,IAAM,SAAS5mB,EAAGa,GACxB,MAAO,IAAI/E,IACFkE,EAAEwM,EAAI3L,EAAE2L,GAAK,GACbxM,EAAEyM,EAAI5L,EAAE4L,GAAK,GACbzM,EAAE2X,EAAI9W,EAAE8W,GAAK,IAWxB7b,EAAQmqB,aAAe,SAASjmB,EAAGa,GACjC,GAAImlB,GAAe,GAAIlqB,EAMvB,OAJAkqB,GAAaxZ,EAAIxM,EAAEyM,EAAI5L,EAAE8W,EAAI3X,EAAE2X,EAAI9W,EAAE4L,EACrCuZ,EAAavZ,EAAIzM,EAAE2X,EAAI9W,EAAE2L,EAAIxM,EAAEwM,EAAI3L,EAAE8W,EACrCqO,EAAarO,EAAI3X,EAAEwM,EAAI3L,EAAE4L,EAAIzM,EAAEyM,EAAI5L,EAAE2L,EAE9BwZ,GAQTlqB,EAAQ8R,UAAUxN,OAAS,WACzB,MAAOT,MAAKyqB,KACJ3vB,KAAK+R,EAAI/R,KAAK+R,EACd/R,KAAKgS,EAAIhS,KAAKgS,EACdhS,KAAKkd,EAAIld,KAAKkd,IAIxBrd,EAAOD,QAAUyB,GAKb,SAASxB,EAAQD,EAASM,GAa9B,QAASoB,GAAOiY,EAAW9K,GACzB,GAAkBjI,SAAd+S,EACF,KAAM,qCAKR,IAHAvZ,KAAKuZ,UAAYA,EACjBvZ,KAAK0oB,QAAWja,GAA8BjI,QAAnBiI,EAAQia,QAAwBja,EAAQia,SAAU,EAEzE1oB,KAAK0oB,QAAS,CAChB1oB,KAAKsf,MAAQ/N,SAASM,cAAc,OAEpC7R,KAAKsf,MAAMrS,MAAMsF,MAAQ,OACzBvS,KAAKsf,MAAMrS,MAAM2W,SAAW,WAC5B5jB,KAAKuZ,UAAU9H,YAAYzR,KAAKsf,OAEhCtf,KAAKsf,MAAMsS,KAAOrgB,SAASM,cAAc,SACzC7R,KAAKsf,MAAMsS,KAAK9qB,KAAO,SACvB9G,KAAKsf,MAAMsS,KAAKvqB,MAAQ,OACxBrH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMsS,MAElC5xB,KAAKsf,MAAM0F,KAAOzT,SAASM,cAAc,SACzC7R,KAAKsf,MAAM0F,KAAKle,KAAO,SACvB9G,KAAKsf,MAAM0F,KAAK3d,MAAQ,OACxBrH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAM0F,MAElChlB,KAAKsf,MAAM+I,KAAO9W,SAASM,cAAc,SACzC7R,KAAKsf,MAAM+I,KAAKvhB,KAAO,SACvB9G,KAAKsf,MAAM+I,KAAKhhB,MAAQ,OACxBrH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAM+I,MAElCroB,KAAKsf,MAAMuS,IAAMtgB,SAASM,cAAc,SACxC7R,KAAKsf,MAAMuS,IAAI/qB,KAAO,SACtB9G,KAAKsf,MAAMuS,IAAI5kB,MAAM2W,SAAW,WAChC5jB,KAAKsf,MAAMuS,IAAI5kB,MAAMb,OAAS,gBAC9BpM,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,MAAQ,QAC7BvS,KAAKsf,MAAMuS,IAAI5kB,MAAMuF,OAAS,MAC9BxS,KAAKsf,MAAMuS,IAAI5kB,MAAM+iB,aAAe,MACpChwB,KAAKsf,MAAMuS,IAAI5kB,MAAM6kB,gBAAkB,MACvC9xB,KAAKsf,MAAMuS,IAAI5kB,MAAMb,OAAS,oBAC9BpM,KAAKsf,MAAMuS,IAAI5kB,MAAM0S,gBAAkB,UACvC3f,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMuS,KAElC7xB,KAAKsf,MAAMyS,MAAQxgB,SAASM,cAAc,SAC1C7R,KAAKsf,MAAMyS,MAAMjrB,KAAO,SACxB9G,KAAKsf,MAAMyS,MAAM9kB,MAAMyM,OAAS,MAChC1Z,KAAKsf,MAAMyS,MAAM1qB,MAAQ,IACzBrH,KAAKsf,MAAMyS,MAAM9kB,MAAM2W,SAAW,WAClC5jB,KAAKsf,MAAMyS,MAAM9kB,MAAMxF,KAAO,SAC9BzH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMyS,MAGlC,IAAI5d,GAAKnU,IACTA,MAAKsf,MAAMyS,MAAM7N,YAAc,SAAU5a,GAAQ6K,EAAGgQ,aAAa7a,IACjEtJ,KAAKsf,MAAMsS,KAAKI,QAAU,SAAU1oB,GAAQ6K,EAAGyd,KAAKtoB,IACpDtJ,KAAKsf,MAAM0F,KAAKgN,QAAU,SAAU1oB,GAAQ6K,EAAG8d,WAAW3oB,IAC1DtJ,KAAKsf,MAAM+I,KAAK2J,QAAU,SAAU1oB,GAAQ6K,EAAGkU,KAAK/e,IAGtDtJ,KAAKkyB,iBAAmB1rB,OAExBxG,KAAK8W,UACL9W,KAAKmI,MAAQ3B,OAEbxG,KAAKmyB,YAAc3rB,OACnBxG,KAAKoyB,aAAe,IACpBpyB,KAAKqyB,UAAW,EA3ElB,GAAI1xB,GAAOT,EAAoB,EAiF/BoB,GAAO6R,UAAUye,KAAO,WACtB,GAAIzpB,GAAQnI,KAAK8oB,UACb3gB,GAAQ,IACVA,IACAnI,KAAKsyB,SAASnqB,KAOlB7G,EAAO6R,UAAUkV,KAAO,WACtB,GAAIlgB,GAAQnI,KAAK8oB,UACb3gB,GAAQnI,KAAK8W,OAAOnR,OAAS,IAC/BwC,IACAnI,KAAKsyB,SAASnqB,KAOlB7G,EAAO6R,UAAUof,SAAW,WAC1B,GAAI3iB,GAAQ,GAAItL,MAEZ6D,EAAQnI,KAAK8oB,UACb3gB,GAAQnI,KAAK8W,OAAOnR,OAAS,GAC/BwC,IACAnI,KAAKsyB,SAASnqB,IAEPnI,KAAKqyB,WAEZlqB,EAAQ,EACRnI,KAAKsyB,SAASnqB,GAGhB,IAAI0H,GAAM,GAAIvL,MACV+nB,EAAQxc,EAAMD,EAId4iB,EAAWttB,KAAKwH,IAAI1M,KAAKoyB,aAAe/F,EAAM,GAG9ClY,EAAKnU,IACTA,MAAKmyB,YAAc7Y,WAAW,WAAYnF,EAAGoe,YAAcC,IAM7DlxB,EAAO6R,UAAU8e,WAAa,WACHzrB,SAArBxG,KAAKmyB,YACPnyB,KAAKglB,OAELhlB,KAAKklB,QAOT5jB,EAAO6R,UAAU6R,KAAO,WAElBhlB,KAAKmyB,cAETnyB,KAAKuyB,WAEDvyB,KAAKsf,QACPtf,KAAKsf,MAAM0F,KAAK3d,MAAQ,UAO5B/F,EAAO6R,UAAU+R,KAAO,WACtBuN,cAAczyB,KAAKmyB,aACnBnyB,KAAKmyB,YAAc3rB,OAEfxG,KAAKsf,QACPtf,KAAKsf,MAAM0F,KAAK3d,MAAQ,SAQ5B/F,EAAO6R,UAAU6V,oBAAsB,SAAS1gB,GAC9CtI,KAAKkyB,iBAAmB5pB,GAO1BhH,EAAO6R,UAAUyV,gBAAkB,SAAS4J,GAC1CxyB,KAAKoyB,aAAeI,GAOtBlxB,EAAO6R,UAAUuf,gBAAkB,WACjC,MAAO1yB,MAAKoyB,cASd9wB,EAAO6R,UAAUwf,YAAc,SAASC,GACtC5yB,KAAKqyB,SAAWO,GAOlBtxB,EAAO6R,UAAU0f,SAAW,WACIrsB,SAA1BxG,KAAKkyB,kBACPlyB,KAAKkyB,oBAOT5wB,EAAO6R,UAAUsO,OAAS,WACxB,GAAIzhB,KAAKsf,MAAO,CAEdtf,KAAKsf,MAAMuS,IAAI5kB,MAAMtF,IAAO3H,KAAKsf,MAAMuF,aAAa,EAChD7kB,KAAKsf,MAAMuS,IAAIvB,aAAa,EAAK,KACrCtwB,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,MAASvS,KAAKsf,MAAME,YACrCxf,KAAKsf,MAAMsS,KAAKpS,YAChBxf,KAAKsf,MAAM0F,KAAKxF,YAChBxf,KAAKsf,MAAM+I,KAAK7I,YAAc,GAAO,IAGzC,IAAI/X,GAAOzH,KAAK8yB,YAAY9yB,KAAKmI,MACjCnI,MAAKsf,MAAMyS,MAAM9kB,MAAMxF,KAAO,EAAS,OAS3CnG,EAAO6R,UAAUwV,UAAY,SAAS7R,GACpC9W,KAAK8W,OAASA,EAEV9W,KAAK8W,OAAOnR,OAAS,EACvB3F,KAAKsyB,SAAS,GAEdtyB,KAAKmI,MAAQ3B,QAOjBlF,EAAO6R,UAAUmf,SAAW,SAASnqB,GACnC,KAAIA,EAAQnI,KAAK8W,OAAOnR,QAOtB,KAAM,2BANN3F,MAAKmI,MAAQA,EAEbnI,KAAKyhB,SACLzhB,KAAK6yB,YAWTvxB,EAAO6R,UAAU2V,SAAW,WAC1B,MAAO9oB,MAAKmI,OAQd7G,EAAO6R,UAAU+B,IAAM,WACrB,MAAOlV,MAAK8W,OAAO9W,KAAKmI,QAI1B7G,EAAO6R,UAAUgR,aAAe,SAAS7a,GAEvC,GAAIgjB,GAAiBhjB,EAAMkjB,MAAyB,IAAhBljB,EAAMkjB,MAAiC,IAAjBljB,EAAMmjB,MAChE,IAAKH,EAAL,CAEAtsB,KAAK+yB,aAAezpB,EAAMqT,QAC1B3c,KAAKgzB,YAAc3N,WAAWrlB,KAAKsf,MAAMyS,MAAM9kB,MAAMxF,MAErDzH,KAAKsf,MAAMrS,MAAM+f,OAAS,MAK1B,IAAI7Y,GAAKnU,IACTA,MAAKitB,YAAc,SAAU3jB,GAAQ6K,EAAG+Y,aAAa5jB,IACrDtJ,KAAKmtB,UAAc,SAAU7jB,GAAQ6K,EAAGoY,WAAWjjB,IACnD3I,EAAKgI,iBAAiB4I,SAAU,YAAavR,KAAKitB,aAClDtsB,EAAKgI,iBAAiB4I,SAAU,UAAavR,KAAKmtB,WAClDxsB,EAAK0I,eAAeC,KAItBhI,EAAO6R,UAAU8f,YAAc,SAAUxrB,GACvC,GAAI8K,GAAQ8S,WAAWrlB,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,OACxCvS,KAAKsf,MAAMyS,MAAMvS,YAAc,GAC/BzN,EAAItK,EAAO,EAEXU,EAAQjD,KAAKwoB,MAAM3b,EAAIQ,GAASvS,KAAK8W,OAAOnR,OAAO,GAIvD,OAHY,GAARwC,IAAWA,EAAQ,GACnBA,EAAQnI,KAAK8W,OAAOnR,OAAO,IAAGwC,EAAQnI,KAAK8W,OAAOnR,OAAO,GAEtDwC,GAGT7G,EAAO6R,UAAU2f,YAAc,SAAU3qB,GACvC,GAAIoK,GAAQ8S,WAAWrlB,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,OACxCvS,KAAKsf,MAAMyS,MAAMvS,YAAc,GAE/BzN,EAAI5J,GAASnI,KAAK8W,OAAOnR,OAAO,GAAK4M,EACrC9K,EAAOsK,EAAI,CAEf,OAAOtK,IAKTnG,EAAO6R,UAAU+Z,aAAe,SAAU5jB,GACxC,GAAI+iB,GAAO/iB,EAAMqT,QAAU3c,KAAK+yB,aAC5BhhB,EAAI/R,KAAKgzB,YAAc3G,EAEvBlkB,EAAQnI,KAAKizB,YAAYlhB,EAE7B/R,MAAKsyB,SAASnqB,GAEdxH,EAAK0I,kBAIP/H,EAAO6R,UAAUoZ,WAAa,WAC5BvsB,KAAKsf,MAAMrS,MAAM+f,OAAS,OAG1BrsB,EAAKwI,oBAAoBoI,SAAU,YAAavR,KAAKitB,aACrDtsB,EAAKwI,oBAAoBoI,SAAU,UAAWvR,KAAKmtB,WAEnDxsB,EAAK0I,kBAGPxJ,EAAOD,QAAU0B,GAKb,SAASzB,GA2Bb,QAAS0B,GAAWqO,EAAOC,EAAKsY,EAAMmB,GAEpCtpB,KAAKkzB,OAAS,EACdlzB,KAAKmzB,KAAO,EACZnzB,KAAKozB,MAAQ,EACbpzB,KAAKspB,YAAa,EAClBtpB,KAAKqzB,UAAY,EAEjBrzB,KAAKszB,SAAW,EAChBtzB,KAAKuzB,SAAS3jB,EAAOC,EAAKsY,EAAMmB,GAYlC/nB,EAAW4R,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAKsY,EAAMmB,GACzDtpB,KAAKkzB,OAAStjB,EAAQA,EAAQ,EAC9B5P,KAAKmzB,KAAOtjB,EAAMA,EAAM,EAExB7P,KAAKwzB,QAAQrL,EAAMmB,IASrB/nB,EAAW4R,UAAUqgB,QAAU,SAASrL,EAAMmB,GAC/B9iB,SAAT2hB,GAA8B,GAARA,IAGP3hB,SAAf8iB,IACFtpB,KAAKspB,WAAaA,GAGlBtpB,KAAKozB,MADHpzB,KAAKspB,cAAe,EACT/nB,EAAWkyB,oBAAoBtL,GAE/BA,IAUjB5mB,EAAWkyB,oBAAsB,SAAUtL,GACzC,GAAIuL,GAAQ,SAAU3hB,GAAI,MAAO7M,MAAKyuB,IAAI5hB,GAAK7M,KAAK0uB,MAGhDC,EAAQ3uB,KAAK4uB,IAAI,GAAI5uB,KAAKwoB,MAAMgG,EAAMvL,KACtC4L,EAAQ,EAAI7uB,KAAK4uB,IAAI,GAAI5uB,KAAKwoB,MAAMgG,EAAMvL,EAAO,KACjD6L,EAAQ,EAAI9uB,KAAK4uB,IAAI,GAAI5uB,KAAKwoB,MAAMgG,EAAMvL,EAAO,KAGjDmB,EAAauK,CASjB,OARI3uB,MAAK2lB,IAAIkJ,EAAQ5L,IAASjjB,KAAK2lB,IAAIvB,EAAanB,KAAOmB,EAAayK,GACpE7uB,KAAK2lB,IAAImJ,EAAQ7L,IAASjjB,KAAK2lB,IAAIvB,EAAanB,KAAOmB,EAAa0K,GAGtD,GAAd1K,IACFA,EAAa,GAGRA,GAOT/nB,EAAW4R,UAAUiV,WAAa,WAChC,MAAO/C,YAAWrlB,KAAKszB,SAASW,YAAYj0B,KAAKqzB,aAOnD9xB,EAAW4R,UAAU+gB,QAAU,WAC7B,MAAOl0B,MAAKozB,OAOd7xB,EAAW4R,UAAUvD,MAAQ,WAC3B5P,KAAKszB,SAAWtzB,KAAKkzB,OAASlzB,KAAKkzB,OAASlzB,KAAKozB,OAMnD7xB,EAAW4R,UAAUkV,KAAO,WAC1BroB,KAAKszB,UAAYtzB,KAAKozB,OAOxB7xB,EAAW4R,UAAUtD,IAAM,WACzB,MAAQ7P,MAAKszB,SAAWtzB,KAAKmzB,MAG/BtzB,EAAOD,QAAU2B,GAKb,SAAS1B,EAAQD,EAASM,GAuB9B,QAASsB,GAAU+X,EAAWtX,EAAOkyB,EAAQ1lB,GAC3C,KAAMzO,eAAgBwB,IACpB,KAAM,IAAIgY,aAAY,mDAIxB,MAAMvT,MAAMC,QAAQiuB,IAAWA,YAAkBtzB,KAAYszB,YAAkB5tB,QAAQ,CACrF,GAAI6tB,GAAgB3lB,CACpBA,GAAU0lB,EACVA,EAASC,EAGX,GAAIjgB,GAAKnU,IACTA,MAAKq0B,gBACHzkB,MAAO,KACPC,IAAO,KAEPykB,YAAY,EAEZC,YAAa,SACbhiB,MAAO,KACPC,OAAQ,KACRgiB,UAAW,KACXC,UAAW,MAEbz0B,KAAKyO,QAAU9N,EAAK8F,cAAezG,KAAKq0B,gBAGxCr0B,KAAK00B,QAAQnb,GAGbvZ,KAAKgC,cAELhC,KAAK20B,MACH5E,IAAK/vB,KAAK+vB,IACV6E,SAAU50B,KAAKgG,MACf6uB,SACEthB,GAAIvT,KAAKuT,GAAGuhB,KAAK90B,MACjB0T,IAAK1T,KAAK0T,IAAIohB,KAAK90B,MACnB4tB,KAAM5tB,KAAK4tB,KAAKkH,KAAK90B,OAEvB+0B,eACAp0B,MACEq0B,KAAM,KACNC,SAAU9gB,EAAG+gB,UAAUJ,KAAK3gB,GAC5BghB,eAAgBhhB,EAAGihB,gBAAgBN,KAAK3gB,GACxCkhB,OAAQlhB,EAAGmhB,QAAQR,KAAK3gB,GACxBohB,aAAephB,EAAGqhB,cAAcV,KAAK3gB,KAKzCnU,KAAKy1B,MAAQ,GAAI5zB,GAAM7B,KAAK20B,MAC5B30B,KAAKgC,WAAWgG,KAAKhI,KAAKy1B,OAC1Bz1B,KAAK20B,KAAKc,MAAQz1B,KAAKy1B,MAGvBz1B,KAAK01B,SAAW,GAAIzyB,GAASjD,KAAK20B,MAClC30B,KAAKgC,WAAWgG,KAAKhI,KAAK01B,UAC1B11B,KAAK20B,KAAKh0B,KAAKq0B,KAAOh1B,KAAK01B,SAASV,KAAKF,KAAK90B,KAAK01B,UAGnD11B,KAAK21B,YAAc,GAAInzB,GAAYxC,KAAK20B,MACxC30B,KAAKgC,WAAWgG,KAAKhI,KAAK21B,aAI1B31B,KAAK41B,WAAa,GAAInzB,GAAWzC,KAAK20B,MACtC30B,KAAKgC,WAAWgG,KAAKhI,KAAK41B,YAG1B51B,KAAK61B,QAAU,GAAI/yB,GAAQ9C,KAAK20B,MAChC30B,KAAKgC,WAAWgG,KAAKhI,KAAK61B,SAE1B71B,KAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGdtnB,GACFzO,KAAKkT,WAAWzE,GAId0lB,GACFn0B,KAAKg2B,UAAU7B,GAIblyB,EACFjC,KAAKi2B,SAASh0B,GAGdjC,KAAKyhB,SAjHT,GAEI9gB,IAFUT,EAAoB,IACrBA,EAAoB,IACtBA,EAAoB,IAC3BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/B2B,EAAQ3B,EAAoB,IAC5Bg2B,EAAOh2B,EAAoB,IAC3B+C,EAAW/C,EAAoB,IAC/BsC,EAActC,EAAoB,IAClCuC,EAAavC,EAAoB,IACjC4C,EAAU5C,EAAoB,GA4GlCsB,GAAS2R,UAAY,GAAI+iB,GAMzB10B,EAAS2R,UAAU8iB,SAAW,SAASh0B,GACrC,GAGIk0B,GAHAC,EAAiC,MAAlBp2B,KAAK81B,SAwBxB,IAhBEK,EAJGl0B,EAGIA,YAAiBpB,IAAWoB,YAAiBnB,GACvCmB,EAIA,GAAIpB,GAAQoB,GACvB6E,MACE8I,MAAO,OACPC,IAAK,UAVI,KAgBf7P,KAAK81B,UAAYK,EACjBn2B,KAAK61B,SAAW71B,KAAK61B,QAAQI,SAASE,GAElCC,EACF,GAA0B5vB,QAAtBxG,KAAKyO,QAAQmB,OAA0CpJ,QAApBxG,KAAKyO,QAAQoB,IAAkB,CACpE,GAA0BrJ,QAAtBxG,KAAKyO,QAAQmB,OAA0CpJ,QAApBxG,KAAKyO,QAAQoB,IAClD,GAAIwmB,GAAYr2B,KAAKs2B,eAGvB,IAAI1mB,GAA8BpJ,QAAtBxG,KAAKyO,QAAQmB,MAAqB5P,KAAKyO,QAAQmB,MAAQymB,EAAUzmB,MACzEC,EAA4BrJ,QAApBxG,KAAKyO,QAAQoB,IAAqB7P,KAAKyO,QAAQoB,IAAQwmB,EAAUxmB,GAE7E7P,MAAKu2B,UAAU3mB,EAAOC,GAAM2mB,SAAS,QAGrCx2B,MAAKy2B,KAAKD,SAAS,KASzBh1B,EAAS2R,UAAU6iB,UAAY,SAAS7B,GAEtC,GAAIgC,EAKFA,GAJGhC,EAGIA,YAAkBtzB,IAAWszB,YAAkBrzB,GACzCqzB,EAIA,GAAItzB,GAAQszB,GAPZ,KAUfn0B,KAAK+1B,WAAaI,EAClBn2B,KAAK61B,QAAQG,UAAUG,IAmBzB30B,EAAS2R,UAAUujB,aAAe,SAASvhB,EAAK1G,GAC9CzO,KAAK61B,SAAW71B,KAAK61B,QAAQa,aAAavhB,GAEtC1G,GAAWA,EAAQkoB,OACrB32B,KAAK22B,MAAMxhB,EAAK1G,IAQpBjN,EAAS2R,UAAUyjB,aAAe,WAChC,MAAO52B,MAAK61B,SAAW71B,KAAK61B,QAAQe,oBAetCp1B,EAAS2R,UAAUwjB,MAAQ,SAASt2B,EAAIoO,GACtC,GAAKzO,KAAK81B,WAAmBtvB,QAANnG,EAAvB,CAEA,GAAI8U,GAAMlP,MAAMC,QAAQ7F,GAAMA,GAAMA,GAGhCy1B,EAAY91B,KAAK81B,UAAUhgB,aAAaZ,IAAIC,GAC9CrO,MACE8I,MAAO,OACPC,IAAK,UAKLD,EAAQ,KACRC,EAAM,IAcV,IAbAimB,EAAUztB,QAAQ,SAAUwuB,GAC1B,GAAIjrB,GAAIirB,EAASjnB,MAAM5I,UACnB8vB,EAAI,OAASD,GAAWA,EAAShnB,IAAI7I,UAAY6vB,EAASjnB,MAAM5I,WAEtD,OAAV4I,GAAsBA,EAAJhE,KACpBgE,EAAQhE,IAGE,OAARiE,GAAgBinB,EAAIjnB,KACtBA,EAAMinB,KAII,OAAVlnB,GAA0B,OAARC,EAAc,CAElC,GAAIT,IAAUQ,EAAQC,GAAO,EACzB2iB,EAAWttB,KAAKwH,IAAK1M,KAAKy1B,MAAM5lB,IAAM7P,KAAKy1B,MAAM7lB,MAAwB,KAAfC,EAAMD,IAEhE4mB,EAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAC7Ex2B,MAAKy1B,MAAMlC,SAASnkB,EAASojB,EAAW,EAAGpjB,EAASojB,EAAW,EAAGgE,MAUtEh1B,EAAS2R,UAAU4jB,aAAe,WAEhC,GAAIC,GAAUh3B,KAAK81B,UAAUhgB,aAC3BhK,EAAM,KACNY,EAAM,IAER,IAAIsqB,EAAS,CAEX,GAAIC,GAAUD,EAAQlrB,IAAI,QAC1BA,GAAMmrB,EAAUt2B,EAAKkG,QAAQowB,EAAQrnB,MAAO,QAAQ5I,UAAY,IAKhE,IAAIkwB,GAAeF,EAAQtqB,IAAI,QAC3BwqB,KACFxqB,EAAM/L,EAAKkG,QAAQqwB,EAAatnB,MAAO,QAAQ5I,UAEjD,IAAImwB,GAAaH,EAAQtqB,IAAI,MACzByqB,KAEAzqB,EADS,MAAPA,EACI/L,EAAKkG,QAAQswB,EAAWtnB,IAAK,QAAQ7I,UAGrC9B,KAAKwH,IAAIA,EAAK/L,EAAKkG,QAAQswB,EAAWtnB,IAAK,QAAQ7I,YAK/D,OACE8E,IAAa,MAAPA,EAAe,GAAIxH,MAAKwH,GAAO,KACrCY,IAAa,MAAPA,EAAe,GAAIpI,MAAKoI,GAAO,OAKzC7M,EAAOD,QAAU4B,GAKb,SAAS3B,EAAQD,EAASM,GAsB9B,QAASuB,GAAS8X,EAAWtX,EAAOkyB,EAAQ1lB,GAE1C,KAAMxI,MAAMC,QAAQiuB,IAAWA,YAAkBtzB,KAAYszB,YAAkB5tB,QAAQ,CACrF,GAAI6tB,GAAgB3lB,CACpBA,GAAU0lB,EACVA,EAASC,EAGX,GAAIjgB,GAAKnU,IACTA,MAAKq0B,gBACHzkB,MAAO,KACPC,IAAO,KAEPykB,YAAY,EAEZC,YAAa,SACbhiB,MAAO,KACPC,OAAQ,KACRgiB,UAAW,KACXC,UAAW,MAEbz0B,KAAKyO,QAAU9N,EAAK8F,cAAezG,KAAKq0B,gBAGxCr0B,KAAK00B,QAAQnb,GAGbvZ,KAAKgC,cAELhC,KAAK20B,MACH5E,IAAK/vB,KAAK+vB,IACV6E,SAAU50B,KAAKgG,MACf6uB,SACEthB,GAAIvT,KAAKuT,GAAGuhB,KAAK90B,MACjB0T,IAAK1T,KAAK0T,IAAIohB,KAAK90B,MACnB4tB,KAAM5tB,KAAK4tB,KAAKkH,KAAK90B,OAEvB+0B,eACAp0B,MACEq0B,KAAM,KACNC,SAAU9gB,EAAG+gB,UAAUJ,KAAK3gB,GAC5BghB,eAAgBhhB,EAAGihB,gBAAgBN,KAAK3gB,GACxCkhB,OAAQlhB,EAAGmhB,QAAQR,KAAK3gB,GACxBohB,aAAephB,EAAGqhB,cAAcV,KAAK3gB,KAKzCnU,KAAKy1B,MAAQ,GAAI5zB,GAAM7B,KAAK20B,MAC5B30B,KAAKgC,WAAWgG,KAAKhI,KAAKy1B,OAC1Bz1B,KAAK20B,KAAKc,MAAQz1B,KAAKy1B,MAGvBz1B,KAAK01B,SAAW,GAAIzyB,GAASjD,KAAK20B,MAClC30B,KAAKgC,WAAWgG,KAAKhI,KAAK01B,UAC1B11B,KAAK20B,KAAKh0B,KAAKq0B,KAAOh1B,KAAK01B,SAASV,KAAKF,KAAK90B,KAAK01B,UAGnD11B,KAAK21B,YAAc,GAAInzB,GAAYxC,KAAK20B,MACxC30B,KAAKgC,WAAWgG,KAAKhI,KAAK21B,aAI1B31B,KAAK41B,WAAa,GAAInzB,GAAWzC,KAAK20B,MACtC30B,KAAKgC,WAAWgG,KAAKhI,KAAK41B,YAG1B51B,KAAKo3B,UAAY,GAAIp0B,GAAUhD,KAAK20B,MACpC30B,KAAKgC,WAAWgG,KAAKhI,KAAKo3B,WAE1Bp3B,KAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGdtnB,GACFzO,KAAKkT,WAAWzE,GAId0lB,GACFn0B,KAAKg2B,UAAU7B,GAIblyB,EACFjC,KAAKi2B,SAASh0B,GAGdjC,KAAKyhB,SA5GT,GAEI9gB,IAFUT,EAAoB,IACrBA,EAAoB,IACtBA,EAAoB,IAC3BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/B2B,EAAQ3B,EAAoB,IAC5Bg2B,EAAOh2B,EAAoB,IAC3B+C,EAAW/C,EAAoB,IAC/BsC,EAActC,EAAoB,IAClCuC,EAAavC,EAAoB,IACjC8C,EAAY9C,EAAoB,GAuGpCuB,GAAQ0R,UAAY,GAAI+iB,GAMxBz0B,EAAQ0R,UAAU8iB,SAAW,SAASh0B,GACpC,GAGIk0B,GAHAC,EAAiC,MAAlBp2B,KAAK81B,SAwBxB,IAhBEK,EAJGl0B,EAGIA,YAAiBpB,IAAWoB,YAAiBnB,GACvCmB,EAIA,GAAIpB,GAAQoB,GACvB6E,MACE8I,MAAO,OACPC,IAAK,UAVI,KAgBf7P,KAAK81B,UAAYK,EACjBn2B,KAAKo3B,WAAap3B,KAAKo3B,UAAUnB,SAASE,GAEtCC,EACF,GAA0B5vB,QAAtBxG,KAAKyO,QAAQmB,OAA0CpJ,QAApBxG,KAAKyO,QAAQoB,IAAkB,CACpE,GAAID,GAA8BpJ,QAAtBxG,KAAKyO,QAAQmB,MAAqB5P,KAAKyO,QAAQmB,MAAQ,KAC/DC,EAA4BrJ,QAApBxG,KAAKyO,QAAQoB,IAAqB7P,KAAKyO,QAAQoB,IAAM,IAEjE7P,MAAKu2B,UAAU3mB,EAAOC,GAAM2mB,SAAS,QAGrCx2B,MAAKy2B,KAAKD,SAAS,KASzB/0B,EAAQ0R,UAAU6iB,UAAY,SAAS7B,GAErC,GAAIgC,EAKFA,GAJGhC,EAGIA,YAAkBtzB,IAAWszB,YAAkBrzB,GACzCqzB,EAIA,GAAItzB,GAAQszB,GAPZ,KAUfn0B,KAAK+1B,WAAaI,EAClBn2B,KAAKo3B,UAAUpB,UAAUG,IAS3B10B,EAAQ0R,UAAUkkB,UAAY,SAASC,EAAS/kB,EAAOC,GAGrD,MAFehM,UAAX+L,IAAuBA,EAAS,IACrB/L,SAAXgM,IAAuBA,EAAS,IACGhM,SAAnCxG,KAAKo3B,UAAUjD,OAAOmD,GACjBt3B,KAAKo3B,UAAUjD,OAAOmD,GAASD,UAAU9kB,EAAMC,GAG/C,qBAAwB8kB,GASnC71B,EAAQ0R,UAAUokB,eAAiB,SAASD,GAC1C,MAAuC9wB,UAAnCxG,KAAKo3B,UAAUjD,OAAOmD,GAChBt3B,KAAKo3B,UAAUjD,OAAOmD,GAAS5O,UAAkEliB,SAAtDxG,KAAKo3B,UAAU3oB,QAAQ0lB,OAAOqD,WAAWF,IAA+E,GAArDt3B,KAAKo3B,UAAU3oB,QAAQ0lB,OAAOqD,WAAWF,KAGxJ,GAWX71B,EAAQ0R,UAAU4jB,aAAe,WAC/B,GAAIjrB,GAAM,KACNY,EAAM,IAGV,KAAK,GAAI4qB,KAAWt3B,MAAKo3B,UAAUjD,OACjC,GAAIn0B,KAAKo3B,UAAUjD,OAAOruB,eAAewxB,IACO,GAA1Ct3B,KAAKo3B,UAAUjD,OAAOmD,GAAS5O,QACjC,IAAK,GAAIljB,GAAI,EAAGA,EAAIxF,KAAKo3B,UAAUjD,OAAOmD,GAASxB,UAAUnwB,OAAQH,IAAK,CACxE,GAAI6J,GAAOrP,KAAKo3B,UAAUjD,OAAOmD,GAASxB,UAAUtwB,GAChD6B,EAAQ1G,EAAKkG,QAAQwI,EAAK0C,EAAG,QAAQ/K,SACzC8E,GAAa,MAAPA,EAAczE,EAAQyE,EAAMzE,EAAQA,EAAQyE,EAClDY,EAAa,MAAPA,EAAcrF,EAAcA,EAANqF,EAAcrF,EAAQqF,EAM1D,OACEZ,IAAa,MAAPA,EAAe,GAAIxH,MAAKwH,GAAO,KACrCY,IAAa,MAAPA,EAAe,GAAIpI,MAAKoI,GAAO,OAMzC7M,EAAOD,QAAU6B,GAKb,SAAS5B,EAAQD,EAASM,GAK9B,GAAI2D,GAAS3D,EAAoB,GAQjCN,GAAQ63B,qBAAuB,SAAS9C,EAAMI,GAE5C,GADAJ,EAAKI,eACDA,GACgC,GAA9B9uB,MAAMC,QAAQ6uB,GAAsB,CACtC,IAAK,GAAIvvB,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IACtC,GAA8BgB,SAA1BuuB,EAAYvvB,GAAGkyB,OAAsB,CACvC,GAAIC,KACJA,GAAS/nB,MAAQ/L,EAAOkxB,EAAYvvB,GAAGoK,OAAO1I,SAASF,UACvD2wB,EAAS9nB,IAAMhM,EAAOkxB,EAAYvvB,GAAGqK,KAAK3I,SAASF,UACnD2tB,EAAKI,YAAY/sB,KAAK2vB,GAG1BhD,EAAKI,YAAY7e,KAAK,SAAU3Q,EAAGa,GACjC,MAAOb,GAAEqK,MAAQxJ,EAAEwJ,UAY3BhQ,EAAQg4B,kBAAoB,SAAUjD,EAAMI,GAC1C,GAAIA,GAAuDvuB,SAAxCmuB,EAAKC,SAASiD,gBAAgBtlB,MAAqB,CACpE3S,EAAQ63B,qBAAqB9C,EAAMI,EAQnC,KAAK,GANDnlB,GAAQ/L,EAAO8wB,EAAKc,MAAM7lB,OAC1BC,EAAMhM,EAAO8wB,EAAKc,MAAM5lB,KAExBioB,EAAcnD,EAAKc,MAAM5lB,IAAM8kB,EAAKc,MAAM7lB,MAC1CmoB,EAAYD,EAAanD,EAAKC,SAASiD,gBAAgBtlB,MAElD/M,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IACtC,GAA8BgB,SAA1BuuB,EAAYvvB,GAAGkyB,OAAsB,CACvC,GAAIM,GAAYn0B,EAAOkxB,EAAYvvB,GAAGoK,OAClCqoB,EAAUp0B,EAAOkxB,EAAYvvB,GAAGqK,IAEpC,IAAoB,gBAAhBmoB,EAAUE,GACZ,KAAM,IAAIt0B,OAAM,qCAAuCmxB,EAAYvvB,GAAGoK,MAExE,IAAkB,gBAAdqoB,EAAQC,GACV,KAAM,IAAIt0B,OAAM,mCAAqCmxB,EAAYvvB,GAAGqK,IAGtE,IAAIC,GAAWmoB,EAAUD,CACzB,IAAIloB,GAAY,EAAIioB,EAAW,CAE7B,GAAIpO,GAAS,EACTwO,EAAWtoB,EAAIuoB,OACnB,QAAQrD,EAAYvvB,GAAGkyB,QACrB,IAAK,QACCM,EAAUK,OAASJ,EAAQI,QAC7B1O,EAAS,GAEXqO,EAAUM,UAAU1oB,EAAM0oB,aAC1BN,EAAUO,KAAK3oB,EAAM2oB,QACrBP,EAAU3M,SAAS,EAAE,QAErB4M,EAAQK,UAAU1oB,EAAM0oB,aACxBL,EAAQM,KAAK3oB,EAAM2oB,QACnBN,EAAQ5M,SAAS,EAAI1B,EAAO,QAE5BwO,EAASllB,IAAI,EAAG,QAChB,MACF,KAAK,SACH,GAAIulB,GAAYP,EAAQ5L,KAAK2L,EAAU,QACnCK,EAAML,EAAUK,KAGpBL,GAAUS,KAAK7oB,EAAM6oB,QACrBT,EAAUU,MAAM9oB,EAAM8oB,SACtBV,EAAUO,KAAK3oB,EAAM2oB,QACrBN,EAAUD,EAAUI,QAGpBJ,EAAUK,IAAIA,GACdJ,EAAQI,IAAIA,GACZJ,EAAQhlB,IAAIulB,EAAU,QAEtBR,EAAU3M,SAAS,EAAE,SACrB4M,EAAQ5M,SAAS,EAAE,SAEnB8M,EAASllB,IAAI,EAAG,QAChB,MACF,KAAK,UACC+kB,EAAUU,SAAWT,EAAQS,UAC/B/O,EAAS,GAEXqO,EAAUU,MAAM9oB,EAAM8oB,SACtBV,EAAUO,KAAK3oB,EAAM2oB,QACrBP,EAAU3M,SAAS,EAAE,UAErB4M,EAAQS,MAAM9oB,EAAM8oB,SACpBT,EAAQM,KAAK3oB,EAAM2oB,QACnBN,EAAQ5M,SAAS,EAAE,UACnB4M,EAAQhlB,IAAI0W,EAAO,UAEnBwO,EAASllB,IAAI,EAAG,SAChB,MACF,KAAK,SACC+kB,EAAUO,QAAUN,EAAQM,SAC9B5O,EAAS,GAEXqO,EAAUO,KAAK3oB,EAAM2oB,QACrBP,EAAU3M,SAAS,EAAE,SACrB4M,EAAQM,KAAK3oB,EAAM2oB,QACnBN,EAAQ5M,SAAS,EAAE,SACnB4M,EAAQhlB,IAAI0W,EAAO,SAEnBwO,EAASllB,IAAI,EAAG,QAChB,MACF,SAEE,WADA0lB,SAAQhF,IAAI,2EAA4EoB,EAAYvvB,GAAGkyB,QAG3G,KAAmBS,EAAZH,GAEL,OADArD,EAAKI,YAAY/sB,MAAM4H,MAAOooB,EAAUhxB,UAAW6I,IAAKooB,EAAQjxB,YACxD+tB,EAAYvvB,GAAGkyB,QACrB,IAAK,QACHM,EAAU/kB,IAAI,EAAG,QACjBglB,EAAQhlB,IAAI,EAAG,OACf,MACF,KAAK,SACH+kB,EAAU/kB,IAAI,EAAG,SACjBglB,EAAQhlB,IAAI,EAAG,QACf,MACF,KAAK,UACH+kB,EAAU/kB,IAAI,EAAG,UACjBglB,EAAQhlB,IAAI,EAAG,SACf,MACF,KAAK,SACH+kB,EAAU/kB,IAAI,EAAG,KACjBglB,EAAQhlB,IAAI,EAAG,IACf,MACF,SAEE,WADA0lB,SAAQhF,IAAI,2EAA4EoB,EAAYvvB,GAAGkyB,QAI7G/C,EAAKI,YAAY/sB,MAAM4H,MAAOooB,EAAUhxB,UAAW6I,IAAKooB,EAAQjxB,aAKtEpH,EAAQg5B,iBAAiBjE,EAEzB,IAAIkE,GAAcj5B,EAAQk5B,SAASnE,EAAKc,MAAM7lB,MAAO+kB,EAAKI,aACtDgE,EAAYn5B,EAAQk5B,SAASnE,EAAKc,MAAM5lB,IAAI8kB,EAAKI,aACjDiE,EAAarE,EAAKc,MAAM7lB,MACxBqpB,EAAWtE,EAAKc,MAAM5lB,GACA,IAAtBgpB,EAAYK,SAAiBF,EAAwC,GAA3BrE,EAAKc,MAAM0D,aAAuBN,EAAYb,UAAY,EAAIa,EAAYZ,QAAU,GAC1G,GAApBc,EAAUG,SAAmBD,EAAsC,GAAzBtE,EAAKc,MAAM2D,WAAuBL,EAAUf,UAAY,EAAMe,EAAUd,QAAU,IACtG,GAAtBY,EAAYK,QAAsC,GAApBH,EAAUG,SAC1CvE,EAAKc,MAAM4D,YAAYL,EAAYC,KAYzCr5B,EAAQg5B,iBAAmB,SAASjE,GAGlC,IAAK,GAFDI,GAAcJ,EAAKI,YACnBuE,KACK9zB,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IACtC,IAAK,GAAIomB,GAAI,EAAGA,EAAImJ,EAAYpvB,OAAQimB,IAClCpmB,GAAKomB,GAA8B,GAAzBmJ,EAAYnJ,GAAGvV,QAA2C,GAAzB0e,EAAYvvB,GAAG6Q,SAExD0e,EAAYnJ,GAAGhc,OAASmlB,EAAYvvB,GAAGoK,OAASmlB,EAAYnJ,GAAG/b,KAAOklB,EAAYvvB,GAAGqK,IACvFklB,EAAYnJ,GAAGvV,QAAS,EAGjB0e,EAAYnJ,GAAGhc,OAASmlB,EAAYvvB,GAAGoK,OAASmlB,EAAYnJ,GAAGhc,OAASmlB,EAAYvvB,GAAGqK,KAC9FklB,EAAYvvB,GAAGqK,IAAMklB,EAAYnJ,GAAG/b,IACpCklB,EAAYnJ,GAAGvV,QAAS,GAGjB0e,EAAYnJ,GAAG/b,KAAOklB,EAAYvvB,GAAGoK,OAASmlB,EAAYnJ,GAAG/b,KAAOklB,EAAYvvB,GAAGqK,MAC1FklB,EAAYvvB,GAAGoK,MAAQmlB,EAAYnJ,GAAGhc,MACtCmlB,EAAYnJ,GAAGvV,QAAS,GAMhC,KAAK,GAAI7Q,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAClCuvB,EAAYvvB,GAAG6Q,UAAW,GAC5BijB,EAAUtxB,KAAK+sB,EAAYvvB,GAI/BmvB,GAAKI,YAAcuE,EACnB3E,EAAKI,YAAY7e,KAAK,SAAU3Q,EAAGa,GACjC,MAAOb,GAAEqK,MAAQxJ,EAAEwJ,SAIvBhQ,EAAQ25B,WAAa,SAASC,GAC5B,IAAK,GAAIh0B,GAAG,EAAGA,EAAIg0B,EAAM7zB,OAAQH,IAC/BmzB,QAAQhF,IAAInuB,EAAG,GAAIlB,MAAKk1B,EAAMh0B,GAAGoK,OAAO,GAAItL,MAAKk1B,EAAMh0B,GAAGqK,KAAM2pB,EAAMh0B,GAAGoK,MAAO4pB,EAAMh0B,GAAGqK,IAAK2pB,EAAMh0B,GAAG6Q,SAS3GzW,EAAQ65B,oBAAsB,SAASC,EAAUC,GAG/C,IAAK,GAFDC,IAAe,EACfC,EAAeH,EAASI,QAAQ9yB,UAC3BxB,EAAI,EAAGA,EAAIk0B,EAAS3E,YAAYpvB,OAAQH,IAAK,CACpD,GAAIwyB,GAAY0B,EAAS3E,YAAYvvB,GAAGoK,MACpCqoB,EAAUyB,EAAS3E,YAAYvvB,GAAGqK,GACtC,IAAIgqB,GAAgB7B,GAA4BC,EAAf4B,EAAwB,CACvDD,GAAe,CACf,QAIJ,GAAoB,GAAhBA,GAAwBC,EAAeH,EAASvG,KAAKnsB,WAAa6yB,GAAgBF,EAAc,CAClG,GAAIlqB,GAAY5L,EAAO81B,GACnBI,EAAWl2B,EAAOo0B,EAElBxoB,GAAU8oB,QAAUwB,EAASxB,OAASmB,EAASM,cAAe,EACzDvqB,EAAUipB,SAAWqB,EAASrB,QAAUgB,EAASO,eAAgB,EACjExqB,EAAU6oB,aAAeyB,EAASzB,cAAcoB,EAASQ,aAAc,GAEhFR,EAASI,QAAUC,EAAS7yB,WAmChCtH,EAAQq1B,SAAW,SAASiB,EAAMiE,EAAM5nB,GACtC,GAAoC,GAAhC2jB,EAAKvB,KAAKI,YAAYpvB,OAAa,CACrC,GAAIy0B,GAAalE,EAAKT,MAAM2E,WAAW7nB,EACvC,QAAQ4nB,EAAKnzB,UAAYozB,EAAWzQ,QAAUyQ,EAAWnd,MAGzD,GAAIic,GAASt5B,EAAQk5B,SAASqB,EAAMjE,EAAKvB,KAAKI,YACzB,IAAjBmE,EAAOA,SACTiB,EAAOjB,EAAOlB,UAGhB,IAAIloB,GAAWlQ,EAAQy6B,yBAAyBnE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAM7lB,MAAOsmB,EAAKT,MAAM5lB,IACpGsqB,GAAOv6B,EAAQ06B,qBAAqBpE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAO0E,EAEvE,IAAIC,GAAalE,EAAKT,MAAM2E,WAAW7nB,EAAOzC,EAC9C,QAAQqqB,EAAKnzB,UAAYozB,EAAWzQ,QAAUyQ,EAAWnd,OAa7Drd,EAAQy1B,OAAS,SAASa,EAAMnkB,EAAGQ,GACjC,GAAoC,GAAhC2jB,EAAKvB,KAAKI,YAAYpvB,OAAa,CACrC,GAAIy0B,GAAalE,EAAKT,MAAM2E,WAAW7nB,EACvC,OAAO,IAAIjO,MAAKyN,EAAIqoB,EAAWnd,MAAQmd,EAAWzQ,QAGlD,GAAI4Q,GAAiB36B,EAAQy6B,yBAAyBnE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAM7lB,MAAOsmB,EAAKT,MAAM5lB,KACtG2qB,EAAgBtE,EAAKT,MAAM5lB,IAAMqmB,EAAKT,MAAM7lB,MAAQ2qB,EACpDE,EAAkBD,EAAgBzoB,EAAIQ,EACtCmoB,EAA4B96B,EAAQ+6B,6BAA6BzE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAOgF,GAEpGG,EAAU,GAAIt2B,MAAKo2B,EAA4BD,EAAkBvE,EAAKT,MAAM7lB,MAChF,OAAOgrB,IAYXh7B,EAAQy6B,yBAA2B,SAAStF,EAAanlB,EAAOC,GAE9D,IAAK,GADDC,GAAW,EACNtK,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAEzBmoB,IAAapoB,GAAmBC,EAAVooB,IACxBnoB,GAAYmoB,EAAUD,GAG1B,MAAOloB,IAWTlQ,EAAQ06B,qBAAuB,SAASvF,EAAaU,EAAO0E,GAG1D,MAFAA,GAAOt2B,EAAOs2B,GAAMjzB,SAASF,UAC7BmzB,GAAQv6B,EAAQi7B,wBAAwB9F,EAAYU,EAAM0E,IAI5Dv6B,EAAQi7B,wBAA0B,SAAS9F,EAAaU,EAAO0E,GAC7D,GAAIW,GAAa,CACjBX,GAAOt2B,EAAOs2B,GAAMjzB,SAASF,SAE7B,KAAK,GAAIxB,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAEzBmoB,IAAavC,EAAM7lB,OAASqoB,EAAUxC,EAAM5lB,KAC1CsqB,GAAQlC,IACV6C,GAAe7C,EAAUD,GAI/B,MAAO8C,IAWTl7B,EAAQ+6B,6BAA+B,SAAS5F,EAAaU,EAAOsF,GAKlE,IAAK,GAJDR,GAAiB,EACjBzqB,EAAW,EACXkrB,EAAgBvF,EAAM7lB,MAEjBpK,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAE7B,IAAImoB,GAAavC,EAAM7lB,OAASqoB,EAAUxC,EAAM5lB,IAAK,CAGnD,GAFAC,GAAYkoB,EAAYgD,EACxBA,EAAgB/C,EACZnoB,GAAYirB,EACd,KAGAR,IAAkBtC,EAAUD,GAKlC,MAAOuC,IAaT36B,EAAQq7B,mBAAqB,SAASlG,EAAaoF,EAAMe,EAAWC,GAClE,GAAIrC,GAAWl5B,EAAQk5B,SAASqB,EAAMpF,EACtC,OAAuB,IAAnB+D,EAASI,OACK,EAAZgC,EACuB,GAArBC,EACKrC,EAASd,WAAac,EAASb,QAAUkC,GAAQ,EAGjDrB,EAASd,UAAY,EAIL,GAArBmD,EACKrC,EAASb,SAAWkC,EAAOrB,EAASd,WAAa,EAGjDc,EAASb,QAAU,EAKvBkC,GAaXv6B,EAAQk5B,SAAW,SAASqB,EAAMpF,GAChC,IAAK,GAAIvvB,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAE7B,IAAIsqB,GAAQnC,GAAoBC,EAAPkC,EACvB,OAAQjB,QAAQ,EAAMlB,UAAWA,EAAWC,QAASA,GAIzD,OAAQiB,QAAQ,EAAOlB,UAAWA,EAAWC,QAASA,KAKpD,SAASp4B,GA4Bb,QAAS+B,GAASgO,EAAOC,EAAKurB,EAAaC,EAAiBC,EAAaC,GAEvEv7B,KAAK85B,QAAU,EAEf95B,KAAKw7B,WAAY,EACjBx7B,KAAKy7B,UAAY,EACjBz7B,KAAKmoB,KAAO,EACZnoB,KAAKid,MAAQ,EAEbjd,KAAK07B,YACL17B,KAAK27B,UACL37B,KAAK47B,UAAY,EAEjB57B,KAAK67B,YAAc,EAAO,EAAM,EAAI,IACpC77B,KAAK87B,YAAc,IAAO,GAAM,EAAI,GAEpC97B,KAAKu7B,WAAaA,EAElBv7B,KAAKuzB,SAAS3jB,EAAOC,EAAKurB,EAAaC,EAAiBC,GAe1D15B,EAASuR,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAKurB,EAAaC,EAAiBC,GAC/Et7B,KAAKkzB,OAA6B1sB,SAApB80B,EAAYxvB,IAAoB8D,EAAQ0rB,EAAYxvB,IAClE9L,KAAKmzB,KAA2B3sB,SAApB80B,EAAY5uB,IAAoBmD,EAAMyrB,EAAY5uB,IAE1D1M,KAAKkzB,QAAUlzB,KAAKmzB,OACtBnzB,KAAKkzB,QAAU,IACflzB,KAAKmzB,MAAQ,GAGO,GAAlBnzB,KAAKw7B,WACPx7B,KAAK+7B,eAAeX,EAAaC,GAGnCr7B,KAAKg8B,SAASV,IAOhB15B,EAASuR,UAAU4oB,eAAiB,SAASX,EAAaC,GAExD,GAAIhpB,GAAOrS,KAAKmzB,KAAOnzB,KAAKkzB,OACxB+I,EAAkB,IAAP5pB,EACX6pB,EAAmBd,GAAea,EAAWZ,GAC7Cc,EAAmBj3B,KAAKwoB,MAAMxoB,KAAKyuB,IAAIsI,GAAU/2B,KAAK0uB,MAEtDwI,EAAe,GACfC,EAAkBn3B,KAAK4uB,IAAI,GAAGqI,GAE9BvsB,EAAQ,CACW,GAAnBusB,IACFvsB,EAAQusB,EAIV,KAAK,GADDG,IAAgB,EACX92B,EAAIoK,EAAO1K,KAAK2lB,IAAIrlB,IAAMN,KAAK2lB,IAAIsR,GAAmB32B,IAAK,CAClE62B,EAAkBn3B,KAAK4uB,IAAI,GAAGtuB,EAC9B,KAAK,GAAIomB,GAAI,EAAGA,EAAI5rB,KAAK87B,WAAWn2B,OAAQimB,IAAK,CAC/C,GAAI2Q,GAAWF,EAAkBr8B,KAAK87B,WAAWlQ,EACjD,IAAI2Q,GAAYL,EAAkB,CAChCI,GAAgB,EAChBF,EAAexQ,CACf,QAGJ,GAAqB,GAAjB0Q,EACF,MAGJt8B,KAAKy7B,UAAYW,EACjBp8B,KAAKid,MAAQof,EACbr8B,KAAKmoB,KAAOkU,EAAkBr8B,KAAK87B,WAAWM,IAShDx6B,EAASuR,UAAU6oB,SAAW,SAASV,GACjB90B,SAAhB80B,IACFA,KAGF,IAAIkB,GAAgCh2B,SAApB80B,EAAYxvB,IAAoB9L,KAAKkzB,OAAuB,EAAblzB,KAAKid,MAAYjd,KAAK87B,WAAW97B,KAAKy7B,WAAcH,EAAYxvB,IAC3H2wB,EAA8Bj2B,SAApB80B,EAAY5uB,IAAoB1M,KAAKmzB,KAAQnzB,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAAcH,EAAY5uB,GAEvH1M,MAAK27B,UAAgCn1B,SAApB80B,EAAY5uB,IAAoB1M,KAAK08B,aAAaD,GAAWnB,EAAY5uB,IAC1F1M,KAAK07B,YAAkCl1B,SAApB80B,EAAYxvB,IAAoB9L,KAAK08B,aAAaF,GAAalB,EAAYxvB,IAGvE,GAAnB9L,KAAKu7B,aAAuBv7B,KAAK27B,UAAY37B,KAAK07B,aAAe17B,KAAKmoB,MAAQ,IAChFnoB,KAAK27B,WAAa37B,KAAK27B,UAAY37B,KAAKmoB,MAG1CnoB,KAAK47B,UAAY57B,KAAK08B,aAAaD,GAAWA,EAAUz8B,KAAK08B,aAAaF,GAAaA,EACvFx8B,KAAK28B,YAAc38B,KAAK27B,UAAY37B,KAAK07B,YAGzC17B,KAAK85B,QAAU95B,KAAK27B,WAGtB/5B,EAASuR,UAAUupB,aAAe,SAASr1B,GACzC,GAAIu1B,GAAUv1B,EAASA,GAASrH,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAClE,OAAIp0B,IAASrH,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,YAAc,GAAOz7B,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAC7FmB,EAAW58B,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAG7CmB,GASXh7B,EAASuR,UAAU0pB,QAAU,WAC3B,MAAQ78B,MAAK85B,SAAW95B,KAAK07B,aAM/B95B,EAASuR,UAAUkV,KAAO,WACxB,GAAIuJ,GAAO5xB,KAAK85B,OAChB95B,MAAK85B,SAAW95B,KAAKmoB,KAGjBnoB,KAAK85B,SAAWlI,IAClB5xB,KAAK85B,QAAU95B,KAAKmzB,OAOxBvxB,EAASuR,UAAU2pB,SAAW,WAC5B98B,KAAK85B,SAAW95B,KAAKmoB,KACrBnoB,KAAK27B,WAAa37B,KAAKmoB,KACvBnoB,KAAK28B,YAAc38B,KAAK27B,UAAY37B,KAAK07B,aAS3C95B,EAASuR,UAAUiV,WAAa,SAAS2U,GAEvC,GAAIjD,GAAW50B,KAAK2lB,IAAI7qB,KAAK85B,SAAW95B,KAAKmoB,KAAO,EAAK,EAAInoB,KAAK85B,QAC9D7F,EAAc,GAAK/vB,OAAO41B,GAAS7F,YAAY,EAGnD,IAAgBztB,SAAbu2B,GAA2Br4B,MAAMR,OAAO64B,KAqCzC,GAAgC,IAA5B9I,EAAYttB,QAAQ,MAA0C,IAA5BstB,EAAYttB,QAAQ,KAExD,IAAK,GAAInB,GAAIyuB,EAAYtuB,OAAS,EAAGH,EAAI,EAAGA,IAAK,CAC/C,GAAsB,KAAlByuB,EAAYzuB,GAGX,CAAA,GAAsB,KAAlByuB,EAAYzuB,IAA+B,KAAlByuB,EAAYzuB,GAAW,CACvDyuB,EAAcA,EAAYhpB,MAAM,EAAGzF,EACnC,OAGA,MAPAyuB,EAAcA,EAAYhpB,MAAM,EAAGzF,QAzCY,CAErD,GAAIw3B,GAAM,GACN70B,EAAQ8rB,EAAYttB,QAAQ,IAoBhC,IAnBY,IAATwB,IAED60B,EAAM/I,EAAYhpB,MAAM9C,GAExB8rB,EAAcA,EAAYhpB,MAAM,EAAG9C,IAErCA,EAAQjD,KAAKwH,IAAIunB,EAAYttB,QAAQ,KAAMstB,EAAYttB,QAAQ,MAClD,KAAVwB,GAEe,IAAb40B,IACD9I,GAAe,KAGjB9rB,EAAQ8rB,EAAYtuB,OAASo3B,GAEV,IAAbA,IAEN50B,GAAS40B,EAAW,GAEnB50B,EAAQ8rB,EAAYtuB,OAErB,IAAI,GAAIs3B,GAAM90B,EAAQ8rB,EAAYtuB,OAAQs3B,EAAM,EAAGA,IACjDhJ,GAAe,QAKjBA,GAAcA,EAAYhpB,MAAM,EAAG9C,EAGrC8rB,IAAe+I,EAoBjB,MAAO/I,IAWTryB,EAASuR,UAAU6hB,KAAO,aAS1BpzB,EAASuR,UAAU+pB,QAAU,WAC3B,MAAQl9B,MAAK85B,SAAW95B,KAAKid,MAAQjd,KAAK67B,WAAW77B,KAAKy7B,aAAe,GAG3E57B,EAAOD,QAAUgC,GAKb,SAAS/B,EAAQD,EAASM,GAgB9B,QAAS2B,GAAM8yB,EAAMlmB,GACnB,GAAI0uB,GAAMt5B,IAASu5B,MAAM,GAAGC,QAAQ,GAAGC,QAAQ,GAAGC,aAAa,EAC/Dv9B,MAAK4P,MAAQutB,EAAI/E,QAAQnlB,IAAI,GAAI,QAAQjM,UACzChH,KAAK6P,IAAMstB,EAAI/E,QAAQnlB,IAAI,EAAG,QAAQjM,UAEtChH,KAAK20B,KAAOA,EACZ30B,KAAKw9B,gBAAkB,EACvBx9B,KAAKy9B,YAAc,EACnBz9B,KAAKm5B,cAAe,EACpBn5B,KAAKo5B,YAAa,EAGlBp5B,KAAKq0B,gBACHzkB,MAAO,KACPC,IAAK,KACLqrB,UAAW,aACXwC,UAAU,EACVC,UAAU,EACV7xB,IAAK,KACLY,IAAK,KACLkxB,QAAS,GACTC,QAAS,UAEX79B,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAEpCr0B,KAAKgG,OACH83B,UAEF99B,KAAK+9B,aAAe,KAGpB/9B,KAAK20B,KAAKE,QAAQthB,GAAG,WAAYvT,KAAKg+B,aAAalJ,KAAK90B,OACxDA,KAAK20B,KAAKE,QAAQthB,GAAG,UAAYvT,KAAKi+B,QAAQnJ,KAAK90B,OACnDA,KAAK20B,KAAKE,QAAQthB,GAAG,SAAYvT,KAAKk+B,WAAWpJ,KAAK90B,OAGtDA,KAAK20B,KAAKE,QAAQthB,GAAG,QAASvT,KAAKm+B,QAAQrJ,KAAK90B,OAGhDA,KAAK20B,KAAKE,QAAQthB,GAAG,aAAcvT,KAAKo+B,cAActJ,KAAK90B,OAG3DA,KAAK20B,KAAKE,QAAQthB,GAAG,QAASvT,KAAKq+B,SAASvJ,KAAK90B,OACjDA,KAAK20B,KAAKE,QAAQthB,GAAG,QAASvT,KAAKs+B,SAASxJ,KAAK90B,OAEjDA,KAAKkT,WAAWzE,GAsClB,QAAS8vB,GAAmBrD,GAC1B,GAAiB,cAAbA,GAA0C,YAAbA,EAC/B,KAAM,IAAI70B,WAAU,sBAAwB60B,EAAY,yCAof5D,QAASsD,GAAYV,EAAOl1B,GAC1B,OACEmJ,EAAG+rB,EAAM/rB,EAAIpR,EAAK2G,gBAAgBsB,GAClCoJ,EAAG8rB,EAAM9rB,EAAIrR,EAAK+G,eAAekB,IA1lBrC,GAAIjI,GAAOT,EAAoB,GAE3B2D,GADa3D,EAAoB,IACxBA,EAAoB,KAC7BqC,EAAYrC,EAAoB,IAChCyB,EAAWzB,EAAoB,GA0DnC2B,GAAMsR,UAAY,GAAI5Q,GAkBtBV,EAAMsR,UAAUD,WAAa,SAAUzE,GACrC,GAAIA,EAAS,CAEX,GAAIP,IAAU,YAAa,MAAO,MAAO,UAAW,UAAW,WAAY,WAAY,WAAY,cACnGvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,IAEvC,SAAWA,IAAW,OAASA,KAEjCzO,KAAKuzB,SAAS9kB,EAAQmB,MAAOnB,EAAQoB,OA4B3ChO,EAAMsR,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAK2mB,EAASiI,GACnDA,KAAW,IACbA,GAAS,EAEX,IAAIvL,GAAkB1sB,QAAToJ,EAAqBjP,EAAKkG,QAAQ+I,EAAO,QAAQ5I,UAAY,KACtEmsB,EAAgB3sB,QAAPqJ,EAAqBlP,EAAKkG,QAAQgJ,EAAK,QAAQ7I,UAAc,IAG1E,IAFAhH,KAAK0+B,mBAEDlI,EAAS,CACX,GAAIriB,GAAKnU,KACL2+B,EAAY3+B,KAAK4P,MACjBgvB,EAAU5+B,KAAK6P,IACfC,EAA8B,gBAAZ0mB,GAAuBA,EAAU,IACnDqI,GAAW,GAAIv6B,OAAO0C,UACtB83B,GAAa,EAEbzW,EAAO,WACT,IAAKlU,EAAGnO,MAAM83B,MAAMiB,SAAU,CAC5B,GAAI5B,IAAM,GAAI74B,OAAO0C,UACjBmzB,EAAOgD,EAAM0B,EACbG,EAAO7E,EAAOrqB,EACdlE,EAAKozB,GAAmB,OAAX9L,EAAmBA,EAASvyB,EAAKgP,cAAcwqB,EAAMwE,EAAWzL,EAAQpjB,GACrFgnB,EAAKkI,GAAiB,OAAT7L,EAAmBA,EAASxyB,EAAKgP,cAAcwqB,EAAMyE,EAASzL,EAAMrjB,EAErFmvB,GAAU9qB,EAAGklB,YAAYztB,EAAGkrB,GAC5Bn1B,EAASi2B,kBAAkBzjB,EAAGwgB,KAAMxgB,EAAG1F,QAAQsmB,aAC/C+J,EAAaA,GAAcG,EACvBA,GACF9qB,EAAGwgB,KAAKE,QAAQjH,KAAK,eAAgBhe,MAAO,GAAItL,MAAK6P,EAAGvE,OAAQC,IAAK,GAAIvL,MAAK6P,EAAGtE,KAAM4uB,OAAOA,IAG5FO,EACEF,GACF3qB,EAAGwgB,KAAKE,QAAQjH,KAAK,gBAAiBhe,MAAO,GAAItL,MAAK6P,EAAGvE,OAAQC,IAAK,GAAIvL,MAAK6P,EAAGtE,KAAM4uB,OAAOA,IAMjGtqB,EAAG4pB,aAAezkB,WAAW+O,EAAM,KAKzC,OAAOA,KAGP,GAAI4W,GAAUj/B,KAAKq5B,YAAYnG,EAAQC,EAEvC,IADAxxB,EAASi2B,kBAAkB53B,KAAK20B,KAAM30B,KAAKyO,QAAQsmB,aAC/CkK,EAAS,CACX,GAAInrB,IAAUlE,MAAO,GAAItL,MAAKtE,KAAK4P,OAAQC,IAAK,GAAIvL,MAAKtE,KAAK6P,KAAM4uB,OAAOA,EAC3Ez+B,MAAK20B,KAAKE,QAAQjH,KAAK,cAAe9Z,GACtC9T,KAAK20B,KAAKE,QAAQjH,KAAK,eAAgB9Z,KAS7CjS,EAAMsR,UAAUurB,iBAAmB,WAC7B1+B,KAAK+9B,eACP1kB,aAAarZ,KAAK+9B,cAClB/9B,KAAK+9B,aAAe,OAaxBl8B,EAAMsR,UAAUkmB,YAAc,SAASzpB,EAAOC,GAC5C,GAIIwc,GAJA6S,EAAqB,MAATtvB,EAAiBjP,EAAKkG,QAAQ+I,EAAO,QAAQ5I,UAAYhH,KAAK4P,MAC1EuvB,EAAmB,MAAPtvB,EAAiBlP,EAAKkG,QAAQgJ,EAAK,QAAQ7I,UAAchH,KAAK6P,IAC1EnD,EAA2B,MAApB1M,KAAKyO,QAAQ/B,IAAe/L,EAAKkG,QAAQ7G,KAAKyO,QAAQ/B,IAAK,QAAQ1F,UAAY,KACtF8E,EAA2B,MAApB9L,KAAKyO,QAAQ3C,IAAenL,EAAKkG,QAAQ7G,KAAKyO,QAAQ3C,IAAK,QAAQ9E,UAAY,IAI1F,IAAItC,MAAMw6B,IAA0B,OAAbA,EACrB,KAAM,IAAIt7B,OAAM,kBAAoBgM,EAAQ,IAE9C,IAAIlL,MAAMy6B,IAAsB,OAAXA,EACnB,KAAM,IAAIv7B,OAAM,gBAAkBiM,EAAM,IAyC1C,IArCaqvB,EAATC,IACFA,EAASD,GAIC,OAARpzB,GACaA,EAAXozB,IACF7S,EAAQvgB,EAAMozB,EACdA,GAAY7S,EACZ8S,GAAU9S,EAGC,MAAP3f,GACEyyB,EAASzyB,IACXyyB,EAASzyB,IAOL,OAARA,GACEyyB,EAASzyB,IACX2f,EAAQ8S,EAASzyB,EACjBwyB,GAAY7S,EACZ8S,GAAU9S,EAGC,MAAPvgB,GACaA,EAAXozB,IACFA,EAAWpzB,IAOU,OAAzB9L,KAAKyO,QAAQmvB,QAAkB,CACjC,GAAIA,GAAUvY,WAAWrlB,KAAKyO,QAAQmvB,QACxB,GAAVA,IACFA,EAAU,GAEcA,EAArBuB,EAASD,IACPl/B,KAAK6P,IAAM7P,KAAK4P,QAAWguB,GAE9BsB,EAAWl/B,KAAK4P,MAChBuvB,EAASn/B,KAAK6P,MAIdwc,EAAQuR,GAAWuB,EAASD,GAC5BA,GAAY7S,EAAO,EACnB8S,GAAU9S,EAAO,IAMvB,GAA6B,OAAzBrsB,KAAKyO,QAAQovB,QAAkB,CACjC,GAAIA,GAAUxY,WAAWrlB,KAAKyO,QAAQovB,QACxB,GAAVA,IACFA,EAAU,GAEPsB,EAASD,EAAYrB,IACnB79B,KAAK6P,IAAM7P,KAAK4P,QAAWiuB,GAE9BqB,EAAWl/B,KAAK4P,MAChBuvB,EAASn/B,KAAK6P,MAIdwc,EAAS8S,EAASD,EAAYrB,EAC9BqB,GAAY7S,EAAO,EACnB8S,GAAU9S,EAAO,IAKvB,GAAI4S,GAAWj/B,KAAK4P,OAASsvB,GAAYl/B,KAAK6P,KAAOsvB,CAUrD,OAPOD,IAAYl/B,KAAK4P,OAASsvB,GAAcl/B,KAAK6P,KAASsvB,GAAYn/B,KAAK4P,OAASuvB,GAAYn/B,KAAK6P,KACjG7P,KAAK4P,OAASsvB,GAAYl/B,KAAK4P,OAASuvB,GAAcn/B,KAAK6P,KAAOqvB,GAAcl/B,KAAK6P,KAAOsvB,GACjGn/B,KAAK20B,KAAKE,QAAQjH,KAAK,oBAGzB5tB,KAAK4P,MAAQsvB,EACbl/B,KAAK6P,IAAMsvB,EACJF,GAOTp9B,EAAMsR,UAAUisB,SAAW,WACzB,OACExvB,MAAO5P,KAAK4P,MACZC,IAAK7P,KAAK6P,MAUdhO,EAAMsR,UAAUinB,WAAa,SAAU7nB,EAAO8sB,GAC5C,MAAOx9B,GAAMu4B,WAAWp6B,KAAK4P,MAAO5P,KAAK6P,IAAK0C,EAAO8sB,IAWvDx9B,EAAMu4B,WAAa,SAAUxqB,EAAOC,EAAK0C,EAAO8sB,GAI9C,MAHoB74B,UAAhB64B,IACFA,EAAc,GAEH,GAAT9sB,GAAe1C,EAAMD,GAAS,GAE9B+Z,OAAQ/Z,EACRqN,MAAO1K,GAAS1C,EAAMD,EAAQyvB,KAK9B1V,OAAQ,EACR1M,MAAO,IAUbpb,EAAMsR,UAAU6qB,aAAe,SAAS10B,GACtCtJ,KAAKw9B,gBAAkB,EACvBx9B,KAAKs/B,cAAgB,EAEhBt/B,KAAKyO,QAAQivB,UAIb19B,KAAKgG,MAAM83B,MAAMyB,gBAEtBv/B,KAAKgG,MAAM83B,MAAMluB,MAAQ5P,KAAK4P,MAC9B5P,KAAKgG,MAAM83B,MAAMjuB,IAAM7P,KAAK6P,IAC5B7P,KAAKgG,MAAM83B,MAAMiB,UAAW,EAExB/+B,KAAK20B,KAAK5E,IAAIrwB,OAChBM,KAAK20B,KAAK5E,IAAIrwB,KAAKuN,MAAM+f,OAAS,QAGpC1jB,EAAMD,mBAQRxH,EAAMsR,UAAU8qB,QAAU,SAAU30B,GAElC,GAAKtJ,KAAKyO,QAAQivB,UAKb19B,KAAKgG,MAAM83B,MAAMyB,cAAtB,CAEA,GAAIrE,GAAYl7B,KAAKyO,QAAQysB,SAC7BqD,GAAkBrD,EAClB,IAAIzM,GAAsB,cAAbyM,EAA6B5xB,EAAMk2B,OAASl2B,EAAMm2B,MAC/DhR,IAASzuB,KAAKw9B,eACd,IAAIhL,GAAYxyB,KAAKgG,MAAM83B,MAAMjuB,IAAM7P,KAAKgG,MAAM83B,MAAMluB,MAGpDE,EAAWnO,EAAS04B,yBAAyBr6B,KAAK20B,KAAKI,YAAa/0B,KAAK4P,MAAO5P,KAAK6P,IACzF2iB,IAAY1iB,CAEZ,IAAIyC,GAAsB,cAAb2oB,EAA6Bl7B,KAAK20B,KAAKC,SAAS1I,OAAO3Z,MAAQvS,KAAK20B,KAAKC,SAAS1I,OAAO1Z,OAClGktB,GAAajR,EAAQlc,EAAQigB,EAC7B0M,EAAWl/B,KAAKgG,MAAM83B,MAAMluB,MAAQ8vB,EACpCP,EAASn/B,KAAKgG,MAAM83B,MAAMjuB,IAAM6vB,EAGhCC,EAAYh+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAamK,EAAUl/B,KAAKs/B,cAAc7Q,GAAO,GACnGmR,EAAUj+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAaoK,EAAQn/B,KAAKs/B,cAAc7Q,GAAO,EACnG,IAAIkR,GAAaT,GAAYU,GAAWT,EAKtC,MAJAn/B,MAAKw9B,iBAAmB/O,EACxBzuB,KAAKgG,MAAM83B,MAAMluB,MAAQ+vB,EACzB3/B,KAAKgG,MAAM83B,MAAMjuB,IAAM+vB,MACvB5/B,MAAKi+B,QAAQ30B,EAIftJ,MAAKs/B,cAAgB7Q,EACrBzuB,KAAKq5B,YAAY6F,EAAUC,GAG3Bn/B,KAAK20B,KAAKE,QAAQjH,KAAK,eACrBhe,MAAO,GAAItL,MAAKtE,KAAK4P,OACrBC,IAAO,GAAIvL,MAAKtE,KAAK6P,KACrB4uB,QAAQ,IAGVn1B,EAAMD,mBAQRxH,EAAMsR,UAAU+qB,WAAa,WAEtBl+B,KAAKyO,QAAQivB,UAKb19B,KAAKgG,MAAM83B,MAAMyB,gBAEtBv/B,KAAKgG,MAAM83B,MAAMiB,UAAW,EACxB/+B,KAAK20B,KAAK5E,IAAIrwB,OAChBM,KAAK20B,KAAK5E,IAAIrwB,KAAKuN,MAAM+f,OAAS,QAIpChtB,KAAK20B,KAAKE,QAAQjH,KAAK,gBACrBhe,MAAO,GAAItL,MAAKtE,KAAK4P,OACrBC,IAAO,GAAIvL,MAAKtE,KAAK6P,KACrB4uB,QAAQ,MAUZ58B,EAAMsR,UAAUirB,cAAgB,SAAS90B,GAEvC,GAAMtJ,KAAKyO,QAAQkvB,UAAY39B,KAAKyO,QAAQivB,SAA5C,CAGA,GAAIjP,GAAQ,CAYZ,IAXInlB,EAAMolB,WACRD,EAAQnlB,EAAMolB,WAAa,IAClBplB,EAAMqlB,SAGfF,GAASnlB,EAAMqlB,OAAS,GAMtBF,EAAO,CAKT,GAAIxR,EAEFA,GADU,EAARwR,EACM,EAAKA,EAAQ,EAGb,GAAK,EAAKA,EAAQ,EAI5B,IAAIoR,GAAUrB,GAAYzsB,EAAGzI,EAAMw2B,MAAO9tB,EAAG1I,EAAMy2B,OAAQ//B,KAAK20B,KAAK5E,IAAI7D,QACrE8T,EAAchgC,KAAKigC,eAAeJ,EAEtC7/B,MAAKkgC,KAAKjjB,EAAO+iB,EAAavR,GAKhCnlB,EAAMD,mBAORxH,EAAMsR,UAAUkrB,SAAW,WACzBr+B,KAAKgG,MAAM83B,MAAMluB,MAAQ5P,KAAK4P,MAC9B5P,KAAKgG,MAAM83B,MAAMjuB,IAAM7P,KAAK6P,IAC5B7P,KAAKgG,MAAM83B,MAAMyB,eAAgB,EACjCv/B,KAAKgG,MAAM83B,MAAM5R,OAAS,KAC1BlsB,KAAKy9B,YAAc,EACnBz9B,KAAKw9B,gBAAkB,GAOzB37B,EAAMsR,UAAUgrB,QAAU,WACxBn+B,KAAKgG,MAAM83B,MAAMyB,eAAgB,GAQnC19B,EAAMsR,UAAUmrB,SAAW,SAAUh1B,GAEnC,GAAMtJ,KAAKyO,QAAQkvB,UAAY39B,KAAKyO,QAAQivB,SAA5C,CAEA19B,KAAKgG,MAAM83B,MAAMyB,eAAgB,EAE5Bv/B,KAAKgG,MAAM83B,MAAM5R,SACpBlsB,KAAKgG,MAAM83B,MAAM5R,OAASsS,EAAWl1B,EAAM4iB,OAAQlsB,KAAK20B,KAAK5E,IAAI7D,QAGnE,IAAIjP,GAAQ,GAAK3T,EAAM2T,MAAQjd,KAAKy9B,aAChC0C,EAAangC,KAAKigC,eAAejgC,KAAKgG,MAAM83B,MAAM5R,QAElDqO,EAAiB54B,EAAS04B,yBAAyBr6B,KAAK20B,KAAKI,YAAa/0B,KAAK4P,MAAO5P,KAAK6P,KAC3FuwB,EAAuBz+B,EAASk5B,wBAAwB76B,KAAK20B,KAAKI,YAAa/0B,KAAMmgC,GACrFE,EAAsB9F,EAAiB6F,EAGvClB,EAAYiB,EAAaC,GAAyBpgC,KAAKgG,MAAM83B,MAAMluB,OAASuwB,EAAaC,IAAyBnjB,EAClHkiB,EAAUgB,EAAaE,GAAwBrgC,KAAKgG,MAAM83B,MAAMjuB,KAAOswB,EAAaE,IAAwBpjB,CAGhHjd,MAAKm5B,aAA4B,GAAb,EAAIlc,EACxBjd,KAAKo5B,WAA0B,GAAbnc,EAAQ,CAE1B,IAAI0iB,GAAYh+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAamK,EAAU,EAAIjiB,GAAO,GACpF2iB,EAAUj+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAaoK,EAAQliB,EAAQ,GAAG,IAChF0iB,GAAaT,GAAYU,GAAWT,KACtCn/B,KAAKgG,MAAM83B,MAAMluB,MAAQ+vB,EACzB3/B,KAAKgG,MAAM83B,MAAMjuB,IAAM+vB,EACvB5/B,KAAKy9B,YAAc,EAAIn0B,EAAM2T,MAC7BiiB,EAAWS,EACXR,EAASS,GAGX5/B,KAAKuzB,SAAS2L,EAAUC,GAAQ,GAAO,GAEvCn/B,KAAKm5B,cAAe,EACpBn5B,KAAKo5B,YAAa,EAElB9vB,EAAMD,mBASRxH,EAAMsR,UAAU8sB,eAAiB,SAAUJ,GACzC,GAAIzF,GACAc,EAAYl7B,KAAKyO,QAAQysB,SAI7B,IAFAqD,EAAkBrD,GAED,cAAbA,EACF,MAAOl7B,MAAK20B,KAAKh0B,KAAK00B,OAAOwK,EAAQ9tB,GAAG/K,SAGxC,IAAIwL,GAASxS,KAAK20B,KAAKC,SAAS1I,OAAO1Z,MAEvC,OADA4nB,GAAap6B,KAAKo6B,WAAW5nB,GACtBqtB,EAAQ7tB,EAAIooB,EAAWnd,MAAQmd,EAAWzQ,QA4BrD9nB,EAAMsR,UAAU+sB,KAAO,SAASjjB,EAAOiP,EAAQuC,GAE/B,MAAVvC,IACFA,GAAUlsB,KAAK4P,MAAQ5P,KAAK6P,KAAO,EAGrC,IAAI0qB,GAAiB54B,EAAS04B,yBAAyBr6B,KAAK20B,KAAKI,YAAa/0B,KAAK4P,MAAO5P,KAAK6P,KAC3FuwB,EAAuBz+B,EAASk5B,wBAAwB76B,KAAK20B,KAAKI,YAAa/0B,KAAMksB,GACrFmU,EAAsB9F,EAAiB6F,EAGvClB,EAAYhT,EAAOkU,GAAyBpgC,KAAK4P,OAASsc,EAAOkU,IAAyBnjB,EAC1FkiB,EAAYjT,EAAOmU,GAAwBrgC,KAAK6P,KAAOqc,EAAOmU,IAAwBpjB,CAG1Fjd,MAAKm5B,aAAe1K,EAAQ,GAAI,GAAQ,EACxCzuB,KAAKo5B,YAAc3K,EAAS,GAAI,GAAQ,CACxC,IAAIkR,GAAYh+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAamK,EAAUzQ,GAAO,GAChFmR,EAAUj+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAaoK,GAAS1Q,GAAO,IAC7EkR,GAAaT,GAAYU,GAAWT,KACtCD,EAAWS,EACXR,EAASS,GAGX5/B,KAAKuzB,SAAS2L,EAAUC,GAAQ,GAAO,GAEvCn/B,KAAKm5B,cAAe,EACpBn5B,KAAKo5B,YAAa,GAWpBv3B,EAAMsR,UAAUmtB,KAAO,SAAS7R,GAE9B,GAAIpC,GAAQrsB,KAAK6P,IAAM7P,KAAK4P,MAGxBsvB,EAAWl/B,KAAK4P,MAAQyc,EAAOoC,EAC/B0Q,EAASn/B,KAAK6P,IAAMwc,EAAOoC,CAI/BzuB,MAAK4P,MAAQsvB,EACbl/B,KAAK6P,IAAMsvB,GAObt9B,EAAMsR,UAAU0U,OAAS,SAASA,GAChC,GAAIqE,IAAUlsB,KAAK4P,MAAQ5P,KAAK6P,KAAO,EAEnCwc,EAAOH,EAASrE,EAGhBqX,EAAWl/B,KAAK4P,MAAQyc,EACxB8S,EAASn/B,KAAK6P,IAAMwc,CAExBrsB,MAAKuzB,SAAS2L,EAAUC,IAG1Bt/B,EAAOD,QAAUiC,GAKb,SAAShC,EAAQD,GAGrB,GAAI2gC,GAAU,IAMd3gC,GAAQ4gC,aAAe,SAASv+B,GAC9BA,EAAMiU,KAAK,SAAU3Q,EAAGa,GACtB,MAAOb,GAAEmN,KAAK9C,MAAQxJ,EAAEsM,KAAK9C,SASjChQ,EAAQ6gC,WAAa,SAASx+B,GAC5BA,EAAMiU,KAAK,SAAU3Q,EAAGa,GACtB,GAAIs6B,GAAS,OAASn7B,GAAEmN,KAAQnN,EAAEmN,KAAK7C,IAAMtK,EAAEmN,KAAK9C,MAChD+wB,EAAS,OAASv6B,GAAEsM,KAAQtM,EAAEsM,KAAK7C,IAAMzJ,EAAEsM,KAAK9C,KAEpD,OAAO8wB,GAAQC,KAenB/gC,EAAQkC,MAAQ,SAASG,EAAOyX,EAAQknB,GACtC,GAAIp7B,GAAGq7B,CAEP,IAAID,EAEF,IAAKp7B,EAAI,EAAGq7B,EAAO5+B,EAAM0D,OAAYk7B,EAAJr7B,EAAUA,IACzCvD,EAAMuD,GAAGmC,IAAM,IAKnB,KAAKnC,EAAI,EAAGq7B,EAAO5+B,EAAM0D,OAAYk7B,EAAJr7B,EAAUA,IAAK,CAC9C,GAAI6J,GAAOpN,EAAMuD,EACjB,IAAI6J,EAAKvN,OAAsB,OAAbuN,EAAK1H,IAAc,CAEnC0H,EAAK1H,IAAM+R,EAAOonB,IAElB,GAAG,CAID,IAAK,GADDC,GAAgB,KACXnV,EAAI,EAAGoV,EAAK/+B,EAAM0D,OAAYq7B,EAAJpV,EAAQA,IAAK,CAC9C,GAAIhmB,GAAQ3D,EAAM2pB,EAClB,IAAkB,OAAdhmB,EAAM+B,KAAgB/B,IAAUyJ,GAAQzJ,EAAM9D,OAASlC,EAAQqhC,UAAU5xB,EAAMzJ,EAAO8T,EAAOrK,MAAO,CACtG0xB,EAAgBn7B,CAChB,QAIiB,MAAjBm7B,IAEF1xB,EAAK1H,IAAMo5B,EAAcp5B,IAAMo5B,EAAcvuB,OAASkH,EAAOrK,KAAKoW,gBAE7Dsb,MAafnhC,EAAQshC,QAAU,SAASj/B,EAAOyX,EAAQynB,GACxC,GAAI37B,GAAGq7B,EAAMO,CAGb,KAAK57B,EAAI,EAAGq7B,EAAO5+B,EAAM0D,OAAYk7B,EAAJr7B,EAAUA,IACzC,GAA+BgB,SAA3BvE,EAAMuD,GAAGkN,KAAK2uB,SAAwB,CACxCD,EAAS1nB,EAAOonB,IAChB,KAAK,GAAIO,KAAYF,GACfA,EAAUr7B,eAAeu7B,IACQ,GAA/BF,EAAUE,GAAU3Y,SAAmByY,EAAUE,GAAUl5B,MAAQg5B,EAAUl/B,EAAMuD,GAAGkN,KAAK2uB,UAAUl5B,QACvGi5B,GAAUD,EAAUE,GAAU7uB,OAASkH,EAAOrK,KAAKoW,SAIzDxjB,GAAMuD,GAAGmC,IAAMy5B,MAGfn/B,GAAMuD,GAAGmC,IAAM+R,EAAOonB,MAe5BlhC,EAAQqhC,UAAY,SAAS17B,EAAGa,EAAGsT,GACjC,MAASnU,GAAEkC,KAAOiS,EAAO8L,WAAa+a,EAAkBn6B,EAAEqB,KAAOrB,EAAEmM,OAC9DhN,EAAEkC,KAAOlC,EAAEgN,MAAQmH,EAAO8L,WAAa+a,EAAWn6B,EAAEqB,MACpDlC,EAAEoC,IAAM+R,EAAO+L,SAAW8a,EAAyBn6B,EAAEuB,IAAMvB,EAAEoM,QAC7DjN,EAAEoC,IAAMpC,EAAEiN,OAASkH,EAAO+L,SAAW8a,EAAan6B,EAAEuB,MAMvD,SAAS9H,EAAQD,EAASM,GAgC9B,QAAS6B,GAAS6N,EAAOC,EAAKurB,EAAarG,GAEzC/0B,KAAK85B,QAAU,GAAIx1B,MACnBtE,KAAKkzB,OAAS,GAAI5uB,MAClBtE,KAAKmzB,KAAO,GAAI7uB,MAEhBtE,KAAKw7B,WAAa,EAClBx7B,KAAKid,MAAQ,MACbjd,KAAKmoB,KAAO,EAGZnoB,KAAKuzB,SAAS3jB,EAAOC,EAAKurB,GAG1Bp7B,KAAKk6B,aAAc,EACnBl6B,KAAKi6B,eAAgB,EACrBj6B,KAAKg6B,cAAe,EACpBh6B,KAAK+0B,YAAcA,EACCvuB,SAAhBuuB,IACF/0B,KAAK+0B,gBAGP/0B,KAAKshC,OAASv/B,EAASw/B,OApDzB,GAAI19B,GAAS3D,EAAoB,IAC7ByB,EAAWzB,EAAoB,IAC/BS,EAAOT,EAAoB,EAsD/B6B,GAASw/B,QACPC,aACEC,YAAY,MACZC,OAAY,IACZC,OAAY,QACZC,KAAY,QACZC,QAAY,QACZxJ,IAAY,IACZK,MAAY,MACZH,KAAY,QAEduJ,aACEL,YAAY,WACZC,OAAY,eACZC,OAAY,aACZC,KAAY,aACZC,QAAY,YACZxJ,IAAY,YACZK,MAAY,OACZH,KAAY,KAUhBx2B,EAASoR,UAAU4uB,UAAY,SAAUT,GACvC,GAAIU,GAAgBrhC,EAAK8F,cAAe1E,EAASw/B,OACjDvhC,MAAKshC,OAAS3gC,EAAK8F,WAAWu7B,EAAeV,IAa/Cv/B,EAASoR,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAKurB,GACjD,KAAMxrB,YAAiBtL,OAAWuL,YAAevL,OAC/C,KAAO,+CAGTtE,MAAKkzB,OAAmB1sB,QAAToJ,EAAsB,GAAItL,MAAKsL,EAAM5I,WAAa,GAAI1C,MACrEtE,KAAKmzB,KAAe3sB,QAAPqJ,EAAoB,GAAIvL,MAAKuL,EAAI7I,WAAa,GAAI1C,MAE3DtE,KAAKw7B,WACPx7B,KAAK+7B,eAAeX,IAOxBr5B,EAASoR,UAAU8uB,MAAQ,WACzBjiC,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAKkzB,OAAOlsB,WACpChH,KAAK08B,gBAOP36B,EAASoR,UAAUupB,aAAe,WAIhC,OAAQ18B,KAAKid,OACX,IAAK,OACHjd,KAAK85B,QAAQoI,YAAYliC,KAAKmoB,KAAOjjB,KAAKC,MAAMnF,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,OAClFnoB,KAAK85B,QAAQsI,SAAS,EACxB,KAAK,QAAgBpiC,KAAK85B,QAAQuI,QAAQ,EAC1C,KAAK,MACL,IAAK,UAAgBriC,KAAK85B,QAAQwI,SAAS,EAC3C,KAAK,OAAgBtiC,KAAK85B,QAAQyI,WAAW,EAC7C,KAAK,SAAgBviC,KAAK85B,QAAQ0I,WAAW,EAC7C,KAAK,SAAgBxiC,KAAK85B,QAAQ2I,gBAAgB,GAIpD,GAAiB,GAAbziC,KAAKmoB,KAEP,OAAQnoB,KAAKid,OACX,IAAK,cAAgBjd,KAAK85B,QAAQ2I,gBAAgBziC,KAAK85B,QAAQ4I,kBAAoB1iC,KAAK85B,QAAQ4I,kBAAoB1iC,KAAKmoB,KAAQ,MACjI,KAAK,SAAgBnoB,KAAK85B,QAAQ0I,WAAWxiC,KAAK85B,QAAQ6I,aAAe3iC,KAAK85B,QAAQ6I,aAAe3iC,KAAKmoB,KAAO,MACjH,KAAK,SAAgBnoB,KAAK85B,QAAQyI,WAAWviC,KAAK85B,QAAQ8I,aAAe5iC,KAAK85B,QAAQ8I,aAAe5iC,KAAKmoB,KAAO,MACjH,KAAK,OAAgBnoB,KAAK85B,QAAQwI,SAAStiC,KAAK85B,QAAQ+I,WAAa7iC,KAAK85B,QAAQ+I,WAAa7iC,KAAKmoB,KAAO,MAC3G,KAAK,UACL,IAAK,MAAgBnoB,KAAK85B,QAAQuI,QAASriC,KAAK85B,QAAQgJ,UAAU,GAAM9iC,KAAK85B,QAAQgJ,UAAU,GAAK9iC,KAAKmoB,KAAO,EAAI,MACpH,KAAK,QAAgBnoB,KAAK85B,QAAQsI,SAASpiC,KAAK85B,QAAQiJ,WAAa/iC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,KAAQ,MAC5G,KAAK,OAAgBnoB,KAAK85B,QAAQoI,YAAYliC,KAAK85B,QAAQqI,cAAgBniC,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,QAUnHpmB,EAASoR,UAAU0pB,QAAU,WAC3B,MAAQ78B,MAAK85B,QAAQ9yB,WAAahH,KAAKmzB,KAAKnsB,WAM9CjF,EAASoR,UAAUkV,KAAO,WACxB,GAAIuJ,GAAO5xB,KAAK85B,QAAQ9yB,SAIxB,IAAIhH,KAAK85B,QAAQiJ,WAAa,EAC5B,OAAQ/iC,KAAKid,OACX,IAAK,cAEHjd,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAYhH,KAAKmoB,KAAO,MAC/D,KAAK,SAAgBnoB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAwB,IAAZhH,KAAKmoB,KAAc,MACzF,KAAK,SAAgBnoB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAwB,IAAZhH,KAAKmoB,KAAc,GAAK,MAC9F,KAAK,OACHnoB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAwB,IAAZhH,KAAKmoB,KAAc,GAAK,GAEzE,IAAIxc,GAAI3L,KAAK85B,QAAQ+I,UACrB7iC,MAAK85B,QAAQwI,SAAS32B,EAAKA,EAAI3L,KAAKmoB,KACpC,MACF,KAAK,UACL,IAAK,MAAgBnoB,KAAK85B,QAAQuI,QAAQriC,KAAK85B,QAAQgJ,UAAY9iC,KAAKmoB,KAAO,MAC/E,KAAK,QAAgBnoB,KAAK85B,QAAQsI,SAASpiC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,KAAO,MACjF,KAAK,OAAgBnoB,KAAK85B,QAAQoI,YAAYliC,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,UAKlF,QAAQnoB,KAAKid,OACX,IAAK,cAAgBjd,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAYhH,KAAKmoB,KAAO,MAClF,KAAK,SAAgBnoB,KAAK85B,QAAQ0I,WAAWxiC,KAAK85B,QAAQ6I,aAAe3iC,KAAKmoB,KAAO,MACrF,KAAK,SAAgBnoB,KAAK85B,QAAQyI,WAAWviC,KAAK85B,QAAQ8I,aAAe5iC,KAAKmoB,KAAO;KACrF,KAAK,OAAgBnoB,KAAK85B,QAAQwI,SAAStiC,KAAK85B,QAAQ+I,WAAa7iC,KAAKmoB,KAAO,MACjF,KAAK,UACL,IAAK,MAAgBnoB,KAAK85B,QAAQuI,QAAQriC,KAAK85B,QAAQgJ,UAAY9iC,KAAKmoB,KAAO,MAC/E,KAAK,QAAgBnoB,KAAK85B,QAAQsI,SAASpiC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,KAAO,MACjF,KAAK,OAAgBnoB,KAAK85B,QAAQoI,YAAYliC,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,MAKpF,GAAiB,GAAbnoB,KAAKmoB,KAEP,OAAQnoB,KAAKid,OACX,IAAK,cAAmBjd,KAAK85B,QAAQ4I,kBAAoB1iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQ2I,gBAAgB,EAAK,MACtG,KAAK,SAAmBziC,KAAK85B,QAAQ6I,aAAe3iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQ0I,WAAW,EAAK,MAC5F,KAAK,SAAmBxiC,KAAK85B,QAAQ8I,aAAe5iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQyI,WAAW,EAAK,MAC5F,KAAK,OAAmBviC,KAAK85B,QAAQ+I,WAAa7iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQwI,SAAS,EAAK,MACxF,KAAK,UACL,IAAK,MAAmBtiC,KAAK85B,QAAQgJ,UAAY9iC,KAAKmoB,KAAK,GAAGnoB,KAAK85B,QAAQuI,QAAQ,EAAI,MACvF,KAAK,QAAmBriC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQsI,SAAS,EAAK,MACxF,KAAK,QAMLpiC,KAAK85B,QAAQ9yB,WAAa4qB,IAC5B5xB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAKmzB,KAAKnsB,YAGpCrF,EAAS83B,oBAAoBz5B,KAAM4xB,IAQrC7vB,EAASoR,UAAUiV,WAAa,WAC9B,MAAOpoB,MAAK85B,SAcd/3B,EAASoR,UAAU6vB,SAAW,SAASC,EAAUC,GAC/CljC,KAAKid,MAAQgmB,EAETC,EAAU,IACZljC,KAAKmoB,KAAO+a,GAGdljC,KAAKw7B,WAAY,GAOnBz5B,EAASoR,UAAUgwB,aAAe,SAAUC,GAC1CpjC,KAAKw7B,UAAY4H,GAQnBrhC,EAASoR,UAAU4oB,eAAiB,SAASX,GAC3C,GAAmB50B,QAAf40B,EAAJ,CAMA,GAAIiI,GAAiB,QACjBC,EAAiB,OACjBC,EAAiB,MACjBC,EAAiB,KACjBC,EAAiB,IACjBC,EAAiB,IACjBC,EAAiB,CAGR,KAATN,EAAgBjI,IAAqBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,KACpE,IAATkb,EAAejI,IAAsBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,KACpE,IAATkb,EAAejI,IAAsBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,KACpE,GAATkb,EAAcjI,IAAuBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,IACpE,GAATkb,EAAcjI,IAAuBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,IACpE,EAATkb,EAAajI,IAAwBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GAC7Ekb,EAAWjI,IAA0Bp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GACnE,EAAVmb,EAAclI,IAAuBp7B,KAAKid,MAAQ,QAAejd,KAAKmoB,KAAO,GAC7Emb,EAAYlI,IAAyBp7B,KAAKid,MAAQ,QAAejd,KAAKmoB,KAAO,GACrE,EAARob,EAAYnI,IAAyBp7B,KAAKid,MAAQ,MAAejd,KAAKmoB,KAAO,GACrE,EAARob,EAAYnI,IAAyBp7B,KAAKid,MAAQ,MAAejd,KAAKmoB,KAAO,GAC7Eob,EAAUnI,IAA2Bp7B,KAAKid,MAAQ,MAAejd,KAAKmoB,KAAO,GAC7Eob,EAAQ,EAAInI,IAAyBp7B,KAAKid,MAAQ,UAAejd,KAAKmoB,KAAO,GACpE,EAATqb,EAAapI,IAAwBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GAC7Eqb,EAAWpI,IAA0Bp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GAClE,GAAXsb,EAAgBrI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,GAAXsb,EAAgBrI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,EAAXsb,EAAerI,IAAsBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAC7Esb,EAAarI,IAAwBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAClE,GAAXub,EAAgBtI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,GAAXub,EAAgBtI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,EAAXub,EAAetI,IAAsBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAC7Eub,EAAatI,IAAwBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAC7D,IAAhBwb,EAAsBvI,IAAep7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,KAC7D,IAAhBwb,EAAsBvI,IAAep7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,KAC7D,GAAhBwb,EAAqBvI,IAAgBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,IAC7D,GAAhBwb,EAAqBvI,IAAgBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,IAC7D,EAAhBwb,EAAoBvI,IAAiBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,GAC7Ewb,EAAkBvI,IAAmBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,KASnFpmB,EAASoR,UAAU6hB,KAAO,SAASyD,GACjC,GAAIL,GAAQ,GAAI9zB,MAAKm0B,EAAKzxB,UAE1B,IAAkB,QAAdhH,KAAKid,MAAiB,CACxB,GAAIsb,GAAOH,EAAM+J,cAAgBj9B,KAAKwoB,MAAM0K,EAAM2K,WAAa,GAC/D3K,GAAM8J,YAAYh9B,KAAKwoB,MAAM6K,EAAOv4B,KAAKmoB,MAAQnoB,KAAKmoB,MACtDiQ,EAAMgK,SAAS,GACfhK,EAAMiK,QAAQ,GACdjK,EAAMkK,SAAS,GACflK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,SAAdziC,KAAKid,MACRmb,EAAM0K,UAAY,IACpB1K,EAAMiK,QAAQ,GACdjK,EAAMgK,SAAShK,EAAM2K,WAAa,IAIlC3K,EAAMiK,QAAQ,GAGhBjK,EAAMkK,SAAS,GACflK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,OAAdziC,KAAKid,MAAgB,CAE5B,OAAQjd,KAAKmoB,MACX,IAAK,GACL,IAAK,GACHiQ,EAAMkK,SAA6C,GAApCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,IAAW,MAC1D,SACEzK,EAAMkK,SAA6C,GAApCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,KAEjDzK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,WAAdziC,KAAKid,MAAoB,CAEhC,OAAQjd,KAAKmoB,MACX,IAAK,GACL,IAAK,GACHiQ,EAAMkK,SAA6C,GAApCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,IAAW,MAC1D,SACEzK,EAAMkK,SAA4C,EAAnCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,IAEjDzK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,QAAdziC,KAAKid,MAAiB,CAC7B,OAAQjd,KAAKmoB,MACX,IAAK,GACHiQ,EAAMmK,WAAiD,GAAtCr9B,KAAKwoB,MAAM0K,EAAMwK,aAAe,IAAW,MAC9D,SACExK,EAAMmK,WAAiD,GAAtCr9B,KAAKwoB,MAAM0K,EAAMwK,aAAe,KAErDxK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OACjB,IAAkB,UAAdziC,KAAKid,MAAmB,CAEjC,OAAQjd,KAAKmoB,MACX,IAAK,IACL,IAAK,IACHiQ,EAAMmK,WAAgD,EAArCr9B,KAAKwoB,MAAM0K,EAAMwK,aAAe,IACjDxK,EAAMoK,WAAW,EACjB,MACF,KAAK,GACHpK,EAAMoK,WAAiD,GAAtCt9B,KAAKwoB,MAAM0K,EAAMuK,aAAe,IAAW,MAC9D,SACEvK,EAAMoK,WAAiD,GAAtCt9B,KAAKwoB,MAAM0K,EAAMuK,aAAe,KAErDvK,EAAMqK,gBAAgB,OAEnB,IAAkB,UAAdziC,KAAKid,MAEZ,OAAQjd,KAAKmoB,MACX,IAAK,IACL,IAAK,IACHiQ,EAAMoK,WAAgD,EAArCt9B,KAAKwoB,MAAM0K,EAAMuK,aAAe,IACjDvK,EAAMqK,gBAAgB,EACtB,MACF,KAAK,GACHrK,EAAMqK,gBAA6D,IAA7Cv9B,KAAKwoB,MAAM0K,EAAMsK,kBAAoB,KAAe,MAC5E,SACEtK,EAAMqK,gBAA4D,IAA5Cv9B,KAAKwoB,MAAM0K,EAAMsK,kBAAoB,UAG5D,IAAkB,eAAd1iC,KAAKid,MAAwB,CACpC,GAAIkL,GAAOnoB,KAAKmoB,KAAO,EAAInoB,KAAKmoB,KAAO,EAAI,CAC3CiQ,GAAMqK,gBAAgBv9B,KAAKwoB,MAAM0K,EAAMsK,kBAAoBva,GAAQA,GAGrE,MAAOiQ,IAQTr2B,EAASoR,UAAU+pB,QAAU,WAC3B,GAAyB,GAArBl9B,KAAKg6B,aAEP,OADAh6B,KAAKg6B,cAAe,EACZh6B,KAAKid,OACX,IAAK,OACL,IAAK,QACL,IAAK,UACL,IAAK,MACL,IAAK,OACL,IAAK,SACL,IAAK,SACL,IAAK,cACH,OAAO,CACT,SACE,OAAO,MAGR,IAA0B,GAAtBjd,KAAKi6B,cAEZ,OADAj6B,KAAKi6B,eAAgB,EACbj6B,KAAKid,OACX,IAAK,UACL,IAAK,MACL,IAAK,OACL,IAAK,SACL,IAAK,SACL,IAAK,cACH,OAAO,CACT,SACE,OAAO,MAGR,IAAwB,GAApBjd,KAAKk6B,YAEZ,OADAl6B,KAAKk6B,aAAc,EACXl6B,KAAKid,OACX,IAAK,cACL,IAAK,SACL,IAAK,SACL,IAAK,OACH,OAAO,CACT,SACE,OAAO,EAIb,OAAQjd,KAAKid,OACX,IAAK,cACH,MAA0C,IAAlCjd,KAAK85B,QAAQ4I,iBACvB,KAAK,SACH,MAAqC,IAA7B1iC,KAAK85B,QAAQ6I,YACvB,KAAK,SACH,MAAmC,IAA3B3iC,KAAK85B,QAAQ+I,YAAkD,GAA7B7iC,KAAK85B,QAAQ8I,YACzD,KAAK,OACH,MAAmC,IAA3B5iC,KAAK85B,QAAQ+I,UACvB,KAAK,UACL,IAAK,MACH,MAAkC,IAA1B7iC,KAAK85B,QAAQgJ,SACvB,KAAK,QACH,MAAmC,IAA3B9iC,KAAK85B,QAAQiJ,UACvB,KAAK,OACH,OAAO,CACT,SACE,OAAO,IAWbhhC,EAASoR,UAAUywB,cAAgB,SAASnL,GAC9BjyB,QAARiyB,IACFA,EAAOz4B,KAAK85B,QAGd,IAAIwH,GAASthC,KAAKshC,OAAOE,YAAYxhC,KAAKid,MAC1C,OAAQqkB,IAAUA,EAAO37B,OAAS,EAAK9B,EAAO40B,GAAM6I,OAAOA,GAAU,IASvEv/B,EAASoR,UAAU0wB,cAAgB,SAASpL,GAC9BjyB,QAARiyB,IACFA,EAAOz4B,KAAK85B,QAGd,IAAIwH,GAASthC,KAAKshC,OAAOQ,YAAY9hC,KAAKid,MAC1C,OAAQqkB,IAAUA,EAAO37B,OAAS,EAAK9B,EAAO40B,GAAM6I,OAAOA,GAAU,IAGvEv/B,EAASoR,UAAU2wB,aAAe,WAKhC,QAASC,GAAK18B,GACZ,MAAQA,GAAQ8gB,EAAO,GAAK,EAAK,QAAU,OAG7C,QAAS6b,GAAMvL,GACb,MAAIA,GAAKwL,OAAO,GAAI3/B,MAAQ,OACnB,SAELm0B,EAAKwL,OAAOpgC,IAASoP,IAAI,EAAG,OAAQ,OAC/B,YAELwlB,EAAKwL,OAAOpgC,IAASoP,IAAI,GAAI,OAAQ,OAChC,aAEF,GAGT,QAASixB,GAAYzL,GACnB,MAAOA,GAAKwL,OAAO,GAAI3/B,MAAQ,QAAU,gBAAkB,GAG7D,QAAS6/B,GAAa1L,GACpB,MAAOA,GAAKwL,OAAO,GAAI3/B,MAAQ,SAAW,iBAAmB,GAG/D,QAAS8/B,GAAY3L,GACnB,MAAOA,GAAKwL,OAAO,GAAI3/B,MAAQ,QAAU,gBAAkB,GA9B7D,GAAI9D,GAAIqD,EAAO7D,KAAK85B,SAChBrB,EAAOj4B,EAAE6jC,OAAS7jC,EAAE6jC,OAAO,MAAQ7jC,EAAE8jC,KAAK,MAC1Cnc,EAAOnoB,KAAKmoB,IA+BhB,QAAQnoB,KAAKid,OACX,IAAK,cACH,MAAO8mB,GAAKtL,EAAK8E,gBAAgBrwB,MAEnC,KAAK,SACH,MAAO62B,GAAKtL,EAAK6E,WAAWpwB,MAE9B,KAAK,SACH,MAAO62B,GAAKtL,EAAK4E,WAAWnwB,MAE9B,KAAK,OACH,GAAIkwB,GAAQ3E,EAAK2E,OAIjB,OAHiB,IAAbp9B,KAAKmoB,OACPiV,EAAQA,EAAQ,KAAOA,EAAQ,IAE1BA,EAAQ,IAAM4G,EAAMvL,GAAQsL,EAAKtL,EAAK2E,QAE/C,KAAK,UACH,MAAO3E,GAAK6I,OAAO,QAAQiD,cACvBP,EAAMvL,GAAQyL,EAAYzL,GAAQsL,EAAKtL,EAAKA,OAElD,KAAK,MACH,GAAIJ,GAAMI,EAAKA,OACXC,EAAQD,EAAK6I,OAAO,QAAQiD,aAChC,OAAO,MAAQlM,EAAM,IAAMK,EAAQyL,EAAa1L,GAAQsL,EAAK1L,EAAM,EAErE,KAAK,QACH,MAAOI,GAAK6I,OAAO,QAAQiD,cACvBJ,EAAa1L,GAAQsL,EAAKtL,EAAKC,QAErC,KAAK,OACH,GAAIH,GAAOE,EAAKF,MAChB,OAAO,OAASA,EAAO6L,EAAY3L,GAAOsL,EAAKxL,EAEjD,SACE,MAAO,KAIb14B,EAAOD,QAAUmC,GAKb,SAASlC,GAOb,QAAS0C,KACPvC,KAAKyO,QAAU,KACfzO,KAAKgG,MAAQ,KAQfzD,EAAU4Q,UAAUD,WAAa,SAASzE,GACpCA,GACF9N,KAAK2E,OAAOtF,KAAKyO,QAASA,IAQ9BlM,EAAU4Q,UAAUsO,OAAS,WAE3B,OAAO,GAMTlf,EAAU4Q,UAAUG,QAAU,aAU9B/Q,EAAU4Q,UAAUqxB,WAAa,WAC/B,GAAIC,GAAWzkC,KAAKgG,MAAM0+B,iBAAmB1kC,KAAKgG,MAAMuM,OACpDvS,KAAKgG,MAAM2+B,kBAAoB3kC,KAAKgG,MAAMwM,MAK9C,OAHAxS,MAAKgG,MAAM0+B,eAAiB1kC,KAAKgG,MAAMuM,MACvCvS,KAAKgG,MAAM2+B,gBAAkB3kC,KAAKgG,MAAMwM,OAEjCiyB,GAGT5kC,EAAOD,QAAU2C,GAKb,SAAS1C,EAAQD,EAASM,GAe9B,QAASsC,GAAamyB,EAAMlmB,GAC1BzO,KAAK20B,KAAOA,EAGZ30B,KAAKq0B,gBACHuQ,iBAAiB,EAEjBC,QAASA,EACTR,OAAQ,MAEVrkC,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBACpCr0B,KAAK2pB,OAAS,EAEd3pB,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GA5BlB,GAAI9N,GAAOT,EAAoB,GAC3BqC,EAAYrC,EAAoB,IAChC2D,EAAS3D,EAAoB,IAC7B2kC,EAAU3kC,EAAoB,GA4BlCsC,GAAY2Q,UAAY,GAAI5Q,GAM5BC,EAAY2Q,UAAUuhB,QAAU,WAC9B,GAAI7C,GAAMtgB,SAASM,cAAc,MACjCggB,GAAIhqB,UAAY,cAChBgqB,EAAI5kB,MAAM2W,SAAW,WACrBiO,EAAI5kB,MAAMtF,IAAM,MAChBkqB,EAAI5kB,MAAMuF,OAAS,OAEnBxS,KAAK6xB,IAAMA,GAMbrvB,EAAY2Q,UAAUG,QAAU,WAC9BtT,KAAKyO,QAAQm2B,iBAAkB,EAC/B5kC,KAAKyhB,SAELzhB,KAAK20B,KAAO,MAQdnyB,EAAY2Q,UAAUD,WAAa,SAASzE,GACtCA,GAEF9N,EAAKoF,iBAAiB,kBAAmB,SAAU,WAAY/F,KAAKyO,QAASA,IAQjFjM,EAAY2Q,UAAUsO,OAAS,WAC7B,GAAIzhB,KAAKyO,QAAQm2B,gBAAiB,CAChC,GAAIE,GAAS9kC,KAAK20B,KAAK5E,IAAIgV,kBACvB/kC,MAAK6xB,IAAIhoB,YAAci7B,IAErB9kC,KAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,KAEvCiT,EAAOrzB,YAAYzR,KAAK6xB,KAExB7xB,KAAK4P,QAGP,IAAIutB,GAAM,GAAI74B,OAAK,GAAIA,OAAO0C,UAAYhH,KAAK2pB,QAC3C5X,EAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASkI,GAE5BkH,EAASrkC,KAAKyO,QAAQo2B,QAAQ7kC,KAAKyO,QAAQ41B,QAC3CW,EAAQX,EAAOvK,QAAU,IAAMuK,EAAOlK,KAAO,KAAOt2B,EAAOs5B,GAAKmE,OAAO,8BAC3E0D,GAAQA,EAAM5f,OAAO,GAAG6f,cAAgBD,EAAME,UAAU,GAExDllC,KAAK6xB,IAAI5kB,MAAMxF,KAAOsK,EAAI,KAC1B/R,KAAK6xB,IAAImT,MAAQA,MAIbhlC,MAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,KAEvC7xB,KAAKklB,MAGP,QAAO,GAMT1iB,EAAY2Q,UAAUvD,MAAQ,WAG5B,QAASiF,KACPV,EAAG+Q,MAGH,IAAIjI,GAAQ9I,EAAGwgB,KAAKc,MAAM2E,WAAWjmB,EAAGwgB,KAAKC,SAAS1I,OAAO3Z,OAAO0K,MAChEuV,EAAW,EAAIvV,EAAQ,EACZ,IAAXuV,IAAiBA,EAAW,IAC5BA,EAAW,MAAMA,EAAW,KAEhCre,EAAGsN,SAGHtN,EAAGgxB,iBAAmB7rB,WAAWzE,EAAQ2d,GAd3C,GAAIre,GAAKnU,IAiBT6U,MAMFrS,EAAY2Q,UAAU+R,KAAO,WACG1e,SAA1BxG,KAAKmlC,mBACP9rB,aAAarZ,KAAKmlC,wBACXnlC,MAAKmlC,mBAUhB3iC,EAAY2Q,UAAUiyB,eAAiB,SAASjL,GAC9C,GAAIrsB,GAAInN,EAAKkG,QAAQszB,EAAM,QAAQnzB,UAC/Bm2B,GAAM,GAAI74B,OAAO0C,SACrBhH,MAAK2pB,OAAS7b,EAAIqvB,EAClBn9B,KAAKyhB,UAOPjf,EAAY2Q,UAAUkyB,eAAiB,WACrC,MAAO,IAAI/gC,OAAK,GAAIA,OAAO0C,UAAYhH,KAAK2pB,SAG9C9pB,EAAOD,QAAU4C,GAKb,SAAS3C,EAAQD,EAASM,GAiB9B,QAASuC,GAAYkyB,EAAMlmB,GACzBzO,KAAK20B,KAAOA,EAGZ30B,KAAKq0B,gBACHiR,gBAAgB,EAChBT,QAASA,EACTR,OAAQ,MAEVrkC,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAEpCr0B,KAAK41B,WAAa,GAAItxB,MACtBtE,KAAKulC,eAGLvlC,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GAhClB,GAAI1K,GAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,GAC3BqC,EAAYrC,EAAoB,IAChC2D,EAAS3D,EAAoB,IAC7B2kC,EAAU3kC,EAAoB,GA+BlCuC,GAAW0Q,UAAY,GAAI5Q,GAO3BE,EAAW0Q,UAAUD,WAAa,SAASzE,GACrCA,GAEF9N,EAAKoF,iBAAiB,iBAAkB,SAAU,WAAY/F,KAAKyO,QAASA,IAQhFhM,EAAW0Q,UAAUuhB,QAAU,WAC7B,GAAI7C,GAAMtgB,SAASM,cAAc,MACjCggB,GAAIhqB,UAAY,aAChBgqB,EAAI5kB,MAAM2W,SAAW,WACrBiO,EAAI5kB,MAAMtF,IAAM,MAChBkqB,EAAI5kB,MAAMuF,OAAS,OACnBxS,KAAK6xB,IAAMA,CAEX,IAAI2T,GAAOj0B,SAASM,cAAc,MAClC2zB,GAAKv4B,MAAM2W,SAAW,WACtB4hB,EAAKv4B,MAAMtF,IAAM,MACjB69B,EAAKv4B,MAAMxF,KAAO,QAClB+9B,EAAKv4B,MAAMuF,OAAS,OACpBgzB,EAAKv4B,MAAMsF,MAAQ,OACnBsf,EAAIpgB,YAAY+zB,GAGhBxlC,KAAK8D,OAAS,GAAIC,GAAOyhC,GACzBxlC,KAAK8D,OAAOyP,GAAG,WAAYvT,KAAKg+B,aAAalJ,KAAK90B,OAClDA,KAAK8D,OAAOyP,GAAG,UAAYvT,KAAKi+B,QAAQnJ,KAAK90B,OAC7CA,KAAK8D,OAAOyP,GAAG,SAAYvT,KAAKk+B,WAAWpJ,KAAK90B,OAChDA,KAAK8D,OAAOyP,GAAG,MAAS,SAAUjK,GAChCA,EAAMD,oBAOV5G,EAAW0Q,UAAUG,QAAU,WAC7BtT,KAAKyO,QAAQ62B,gBAAiB,EAC9BtlC,KAAKyhB,SAELzhB,KAAK8D,OAAOs/B,QAAO,GACnBpjC,KAAK8D,OAAS,KAEd9D,KAAK20B,KAAO,MAOdlyB,EAAW0Q,UAAUsO,OAAS,WAC5B,GAAIzhB,KAAKyO,QAAQ62B,eAAgB,CAC/B,GAAIR,GAAS9kC,KAAK20B,KAAK5E,IAAIgV,kBACvB/kC,MAAK6xB,IAAIhoB,YAAci7B,IAErB9kC,KAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,KAEvCiT,EAAOrzB,YAAYzR,KAAK6xB,KAG1B,IAAI9f,GAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASj1B,KAAK41B,YAEjCyO,EAASrkC,KAAKyO,QAAQo2B,QAAQ7kC,KAAKyO,QAAQ41B,QAC3CW,EAAQX,EAAOlK,KAAO,KAAOt2B,EAAO7D,KAAK41B,YAAY0L,OAAO,8BAChE0D,GAAQA,EAAM5f,OAAO,GAAG6f,cAAgBD,EAAME,UAAU,GAExDllC,KAAK6xB,IAAI5kB,MAAMxF,KAAOsK,EAAI,KAC1B/R,KAAK6xB,IAAImT,MAAQA,MAIbhlC,MAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,IAIzC,QAAO,GAOTpvB,EAAW0Q,UAAUsyB,cAAgB,SAAStL,GAC5Cn6B,KAAK41B,WAAaj1B,EAAKkG,QAAQszB,EAAM,QACrCn6B,KAAKyhB,UAOPhf,EAAW0Q,UAAUuyB,cAAgB,WACnC,MAAO,IAAIphC,MAAKtE,KAAK41B,WAAW5uB,YAQlCvE,EAAW0Q,UAAU6qB,aAAe,SAAS10B,GAC3CtJ,KAAKulC,YAAYxG,UAAW,EAC5B/+B,KAAKulC,YAAY3P,WAAa51B,KAAK41B,WAEnCtsB,EAAMq8B,kBACNr8B,EAAMD,kBAQR5G,EAAW0Q,UAAU8qB,QAAU,SAAU30B,GACvC,GAAKtJ,KAAKulC,YAAYxG,SAAtB,CAEA,GAAIhtB,GAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASj1B,KAAKulC,YAAY3P,YAActsB,EAAMk2B,OACjErF,EAAOn6B,KAAK20B,KAAKh0B,KAAK00B,OAAOtjB,EAEjC/R,MAAKylC,cAActL,GAGnBn6B,KAAK20B,KAAKE,QAAQjH,KAAK,cACrBuM,KAAM,GAAI71B,MAAKtE,KAAK41B,WAAW5uB,aAGjCsC,EAAMq8B,kBACNr8B,EAAMD,mBAQR5G,EAAW0Q,UAAU+qB,WAAa,SAAU50B,GACrCtJ,KAAKulC,YAAYxG,WAGtB/+B,KAAK20B,KAAKE,QAAQjH,KAAK,eACrBuM,KAAM,GAAI71B,MAAKtE,KAAK41B,WAAW5uB,aAGjCsC,EAAMq8B,kBACNr8B,EAAMD,mBAGRxJ,EAAOD,QAAU6C,GAKb,SAAS5C,EAAQD,EAASM,GAe9B,QAASwC,GAAUiyB,EAAMlmB,EAASm3B,EAAKC,GACrC7lC,KAAKK,GAAKM,EAAKqE,aACfhF,KAAK20B,KAAOA,EAEZ30B,KAAKq0B,gBACHE,YAAa,OACbuR,iBAAiB,EACjBC,iBAAiB,EACjBC,OAAO,EACPC,iBAAkB,EAClBC,iBAAkB,EAClBC,aAAc,GACdC,aAAc,EACdC,UAAW,GACX9zB,MAAO,OACPmW,SAAS,EACT6S,YAAY,EACZD,aACE7zB,MAAOqE,IAAItF,OAAWkG,IAAIlG,QAC1B6gB,OAAQvb,IAAItF,OAAWkG,IAAIlG,SAE7Bw+B,OACEv9B,MAAO8hB,KAAK/iB,QACZ6gB,OAAQkC,KAAK/iB,SAEf86B,QACE75B,MAAOs1B,SAAUv2B,QACjB6gB,OAAQ0V,SAAUv2B,UAItBxG,KAAK6lC,iBAAmBA,EACxB7lC,KAAKsmC,aAAeV,EACpB5lC,KAAKgG,SACLhG,KAAKumC,aACHC,SACAC,UACAzB,UAGFhlC,KAAK+vB,OAEL/vB,KAAKy1B,OAAS7lB,MAAM,EAAGC,IAAI,GAE3B7P,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBACpCr0B,KAAK0mC,iBAAmB,EAExB1mC,KAAKkT,WAAWzE,GAChBzO,KAAKuS,MAAQrO,QAAQ,GAAKlE,KAAKyO,QAAQ8D,OAAO/H,QAAQ,KAAK,KAC3DxK,KAAK2mC,SAAW3mC,KAAKuS,MACrBvS,KAAKwS,OAASxS,KAAKsmC,aAAahW,aAChCtwB,KAAKk5B,QAAS,EAEdl5B,KAAK4mC,WAAa,GAClB5mC,KAAK6mC,iBAAmB,GACxB7mC,KAAK8mC,aAAe,GAEpB9mC,KAAK+mC,WAAa,EAClB/mC,KAAKgnC,QAAS,EACdhnC,KAAKinC,eACLjnC,KAAKknC,cAAe,EAGpBlnC,KAAKm0B,UACLn0B,KAAKmnC,eAAiB,EAGtBnnC,KAAK00B,SAEL,IAAIvgB,GAAKnU,IACTA,MAAK20B,KAAKE,QAAQthB,GAAG,eAAgB,WACnCY,EAAG4b,IAAIqX,cAAcn6B,MAAMtF,IAAMwM,EAAGwgB,KAAKC,SAASyS,UAAY,OApFlE,GAAI1mC,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9BqC,EAAYrC,EAAoB,IAChC0B,EAAW1B,EAAoB,GAqFnCwC,GAASyQ,UAAY,GAAI5Q,GAGzBG,EAASyQ,UAAUm0B,SAAW,SAAS7e,EAAO8e,GACvCvnC,KAAKm0B,OAAOruB,eAAe2iB,KAC9BzoB,KAAKm0B,OAAO1L,GAAS8e,GAEvBvnC,KAAKmnC,gBAAkB,GAGzBzkC,EAASyQ,UAAUq0B,YAAc,SAAS/e,EAAO8e,GAC/CvnC,KAAKm0B,OAAO1L,GAAS8e,GAGvB7kC,EAASyQ,UAAUs0B,YAAc,SAAShf,GACpCzoB,KAAKm0B,OAAOruB,eAAe2iB,WACtBzoB,MAAKm0B,OAAO1L,GACnBzoB,KAAKmnC,gBAAkB,IAK3BzkC,EAASyQ,UAAUD,WAAa,SAAUzE,GACxC,GAAIA,EAAS,CACX,GAAIgT,IAAS,CACTzhB,MAAKyO,QAAQ8lB,aAAe9lB,EAAQ8lB,aAAuC/tB,SAAxBiI,EAAQ8lB,cAC7D9S,GAAS,EAEX,IAAIvT,IACF,cACA,kBACA,kBACA,QACA,mBACA,mBACA,eACA,eACA,YACA,QACA,UACA,cACA,QACA,SACA,aAEFvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,GAE3CzO,KAAK2mC,SAAWziC,QAAQ,GAAKlE,KAAKyO,QAAQ8D,OAAO/H,QAAQ,KAAK,KAEhD,GAAViX,GAAkBzhB,KAAK+vB,IAAIzQ,QAC7Btf,KAAK0nC,OACL1nC,KAAK2nC,UASXjlC,EAASyQ,UAAUuhB,QAAU,WAC3B10B,KAAK+vB,IAAIzQ,MAAQ/N,SAASM,cAAc,OACxC7R,KAAK+vB,IAAIzQ,MAAMrS,MAAMsF,MAAQvS,KAAKyO,QAAQ8D,MAC1CvS,KAAK+vB,IAAIzQ,MAAMrS,MAAMuF,OAASxS,KAAKwS,OAEnCxS,KAAK+vB,IAAIqX,cAAgB71B,SAASM,cAAc,OAChD7R,KAAK+vB,IAAIqX,cAAcn6B,MAAMsF,MAAQ,OACrCvS,KAAK+vB,IAAIqX,cAAcn6B,MAAMuF,OAASxS,KAAKwS,OAC3CxS,KAAK+vB,IAAIqX,cAAcn6B,MAAM2W,SAAW,WAGxC5jB,KAAK4lC,IAAMr0B,SAASC,gBAAgB,6BAA6B,OACjExR,KAAK4lC,IAAI34B,MAAM2W,SAAW,WAC1B5jB,KAAK4lC,IAAI34B,MAAMtF,IAAM,MACrB3H,KAAK4lC,IAAI34B,MAAMuF,OAAS,OACxBxS,KAAK4lC,IAAI34B,MAAMsF,MAAQ,OACvBvS,KAAK4lC,IAAI34B,MAAM26B,QAAU,QACzB5nC,KAAK+vB,IAAIzQ,MAAM7N,YAAYzR,KAAK4lC,MAGlCljC,EAASyQ,UAAU00B,kBAAoB,WACrCjnC,EAAQiQ,gBAAgB7Q,KAAKinC,YAE7B,IAAIl1B,GACAs0B,EAAYrmC,KAAKyO,QAAQ43B,UACzByB,EAAa,GACbC,EAAa,EACb/1B,EAAI+1B,EAAa,GAAMD,CAGzB/1B,GAD8B,QAA5B/R,KAAKyO,QAAQ8lB,YACXwT,EAGA/nC,KAAKuS,MAAQ8zB,EAAY0B,CAG/B,KAAK,GAAIzQ,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,KACvIt3B,KAAKm0B,OAAOmD,GAAS0Q,SAASj2B,EAAGC,EAAGhS,KAAKinC,YAAajnC,KAAK4lC,IAAKS,EAAWyB,GAC3E91B,GAAK81B,EAAaC,GAKxBnnC,GAAQsQ,gBAAgBlR,KAAKinC,aAC7BjnC,KAAKknC,cAAe,GAGtBxkC,EAASyQ,UAAU80B,cAAgB,WACR,GAArBjoC,KAAKknC,eACPtmC,EAAQiQ,gBAAgB7Q,KAAKinC,aAC7BrmC,EAAQsQ,gBAAgBlR,KAAKinC,aAC7BjnC,KAAKknC,cAAe,IAOxBxkC,EAASyQ,UAAUw0B,KAAO,WACxB3nC,KAAKk5B,QAAS,EACTl5B,KAAK+vB,IAAIzQ,MAAMzV,aACc,QAA5B7J,KAAKyO,QAAQ8lB,YACfv0B,KAAK20B,KAAK5E,IAAItoB,KAAKgK,YAAYzR,KAAK+vB,IAAIzQ,OAGxCtf,KAAK20B,KAAK5E,IAAI1I,MAAM5V,YAAYzR,KAAK+vB,IAAIzQ,QAIxCtf,KAAK+vB,IAAIqX,cAAcv9B,YAC1B7J,KAAK20B,KAAK5E,IAAImY,qBAAqBz2B,YAAYzR,KAAK+vB,IAAIqX,gBAO5D1kC,EAASyQ,UAAUu0B,KAAO,WACxB1nC,KAAKk5B,QAAS,EACVl5B,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,OAG7Ctf,KAAK+vB,IAAIqX,cAAcv9B,YACzB7J,KAAK+vB,IAAIqX,cAAcv9B,WAAWsH,YAAYnR,KAAK+vB,IAAIqX,gBAU3D1kC,EAASyQ,UAAUogB,SAAW,SAAU3jB,EAAOC,GAC1B,GAAf7P,KAAKgnC,QAA8C,GAA3BhnC,KAAKyO,QAAQ8sB,YAA2C,IAArBv7B,KAAK8mC,cAC9Dl3B,EAAQ,IACVA,EAAQ,GAGZ5P,KAAKy1B,MAAM7lB,MAAQA,EACnB5P,KAAKy1B,MAAM5lB,IAAMA,GAOnBnN,EAASyQ,UAAUsO,OAAS,WAC1B,GAAIgjB,IAAU,EACV0D,EAAe,CAGnBnoC,MAAK+vB,IAAIqX,cAAcn6B,MAAMtF,IAAM3H,KAAK20B,KAAKC,SAASyS,UAAY,IAElE,KAAK,GAAI/P,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,IACvI6Q,IAIN,IAA2B,GAAvBnoC,KAAKmnC,gBAAuC,GAAhBgB,EAC9BnoC,KAAK0nC,WAEF,CACH1nC,KAAK2nC,OACL3nC,KAAKwS,OAAStO,OAAOlE,KAAKsmC,aAAar5B,MAAMuF,OAAOhI,QAAQ,KAAK,KAGjExK,KAAK+vB,IAAIqX,cAAcn6B,MAAMuF,OAASxS,KAAKwS,OAAS,KACpDxS,KAAKuS,MAAgC,GAAxBvS,KAAKyO,QAAQia,QAAkBxkB,QAAQ,GAAKlE,KAAKyO,QAAQ8D,OAAO/H,QAAQ,KAAK,KAAO,CAEjG,IAAIxE,GAAQhG,KAAKgG,MACbsZ,EAAQtf,KAAK+vB,IAAIzQ,KAGrBA,GAAMzX,UAAY,WAGlB7H,KAAKooC,oBAEL,IAAI7T,GAAcv0B,KAAKyO,QAAQ8lB,YAC3BuR,EAAkB9lC,KAAKyO,QAAQq3B,gBAC/BC,EAAkB/lC,KAAKyO,QAAQs3B,eAGnC//B,GAAMqiC,iBAAmBvC,EAAkB9/B,EAAMsiC,gBAAkB,EACnEtiC,EAAMuiC,iBAAmBxC,EAAkB//B,EAAMwiC,gBAAkB,EAEnExiC,EAAMyiC,eAAiBzoC,KAAK20B,KAAK5E,IAAImY,qBAAqB9X,YAAcpwB,KAAK+mC,WAAa/mC,KAAKuS,MAAQ,EAAIvS,KAAKyO,QAAQy3B,iBACxHlgC,EAAM0iC,gBAAkB,EACxB1iC,EAAM2iC,eAAiB3oC,KAAK20B,KAAK5E,IAAImY,qBAAqB9X,YAAcpwB,KAAK+mC,WAAa/mC,KAAKuS,MAAQ,EAAIvS,KAAKyO,QAAQw3B,iBACxHjgC,EAAM4iC,gBAAkB,EAGL,QAAfrU,GACFjV,EAAMrS,MAAMtF,IAAM,IAClB2X,EAAMrS,MAAMxF,KAAO,IACnB6X,EAAMrS,MAAMqW,OAAS,GACrBhE,EAAMrS,MAAMsF,MAAQvS,KAAKuS,MAAQ,KACjC+M,EAAMrS,MAAMuF,OAASxS,KAAKwS,OAAS,KACnCxS,KAAKgG,MAAMuM,MAAQvS,KAAK20B,KAAKC,SAASntB,KAAK8K,MAC3CvS,KAAKgG,MAAMwM,OAASxS,KAAK20B,KAAKC,SAASntB,KAAK+K,SAG5C8M,EAAMrS,MAAMtF,IAAM,GAClB2X,EAAMrS,MAAMqW,OAAS,IACrBhE,EAAMrS,MAAMxF,KAAO,IACnB6X,EAAMrS,MAAMsF,MAAQvS,KAAKuS,MAAQ,KACjC+M,EAAMrS,MAAMuF,OAASxS,KAAKwS,OAAS,KACnCxS,KAAKgG,MAAMuM,MAAQvS,KAAK20B,KAAKC,SAASvN,MAAM9U,MAC5CvS,KAAKgG,MAAMwM,OAASxS,KAAK20B,KAAKC,SAASvN,MAAM7U,QAG/CiyB,EAAUzkC,KAAK6oC,gBACfpE,EAAUzkC,KAAKwkC,cAAgBC,EAEL,GAAtBzkC,KAAKyO,QAAQu3B,MACfhmC,KAAK6nC,oBAGL7nC,KAAKioC,gBAGPjoC,KAAK8oC,aAAavU,GAEpB,MAAOkQ,IAOT/hC,EAASyQ,UAAU01B,cAAgB,WACjC,GAAIpE,IAAU,CACd7jC,GAAQiQ,gBAAgB7Q,KAAKumC,YAAYC,OACzC5lC,EAAQiQ,gBAAgB7Q,KAAKumC,YAAYE,OAEzC,IAAIlS,GAAcv0B,KAAKyO,QAAqB,YAGxC2sB,EAAcp7B,KAAKgnC,OAAShnC,KAAKgG,MAAMwiC,iBAAmB,GAAKxoC,KAAK6mC,iBAEpE1e,EAAO,GAAIvmB,GACb5B,KAAKy1B,MAAM7lB,MACX5P,KAAKy1B,MAAM5lB,IACXurB,EACAp7B,KAAK+vB,IAAIzQ,MAAMgR,aACftwB,KAAKyO,QAAQ6sB,YAAYt7B,KAAKyO,QAAQ8lB,aACvB,GAAfv0B,KAAKgnC,QAAmBhnC,KAAKyO,QAAQ8sB,WAGvCv7B,MAAKmoB,KAAOA,CAGZ,IAAIye,IAAc5mC,KAAK+vB,IAAIzQ,MAAMgR,aAAgBnI,EAAKyT,WAAa57B,KAAK+vB,IAAIzQ,MAAMgR,aAAenI,EAAKwU,gBAAoBxU,EAAKwU,YAAcxU,EAAKyT,WAAazT,EAAKA,KAEpKnoB,MAAK4mC,WAAaA,CAElB,IAAImC,GAAgB/oC,KAAKwS,OAASo0B,EAC9BoC,EAAiB,CAGrB,IAAmB,GAAfhpC,KAAKgnC,OAAiB,CACxBJ,EAAa5mC,KAAK6mC,iBAClBmC,EAAiB9jC,KAAKwoB,MAAO1tB,KAAK+vB,IAAIzQ,MAAMgR,aAAesW,EAAcmC,EACzE,KAAK,GAAIvjC,GAAI,EAAO,GAAMwjC,EAAVxjC,EAA0BA,IACxC2iB,EAAK2U,UAIP,IAFAiM,EAAgB/oC,KAAKwS,OAASo0B,EAEL,IAArB5mC,KAAK8mC,cAAiD,GAA3B9mC,KAAKyO,QAAQ8sB,WAAoB,CAC9D,GAAI0N,GAAsB9gB,EAAKwT,UAAYxT,EAAKA,KAAQnoB,KAAK8mC,YAC7D,IAAImC,EAAqB,EACvB,IAAK,GAAIzjC,GAAI,EAAOyjC,EAAJzjC,EAAwBA,IAAM2iB,EAAKE,WAEhD,IAAyB,EAArB4gB,EACP,IAAK,GAAIzjC,GAAI,GAAQyjC,EAALzjC,EAAyBA,IAAM2iB,EAAK2U,gBAKxDiM,IAAiB,GAInB/oC,MAAKkpC,YAAc/gB,EAAKwT,SACxB,IAMIoB,GANAoM,EAAiB,EAGjBz8B,EAAM,CAI8BlG,UAArCxG,KAAKyO,QAAQ6yB,OAAO/M,KACrBwI,EAAW/8B,KAAKyO,QAAQ6yB,OAAO/M,GAAawI,UAG9C/8B,KAAKopC,aAAe,CAEpB,KADA,GAAIp3B,GAAI,EACDtF,EAAMxH,KAAKwoB,MAAMqb,IAAgB,CACtC5gB,EAAKE,OACLrW,EAAI9M,KAAKwoB,MAAMhhB,EAAMk6B,GACrBuC,EAAiBz8B,EAAMk6B,CACvB,IAAI1J,GAAU/U,EAAK+U,WAEfl9B,KAAKyO,QAAyB,iBAAgB,GAAXyuB,GAAmC,GAAfl9B,KAAKgnC,QAAsD,GAAnChnC,KAAKyO,QAAyB,kBAC/GzO,KAAKqpC,aAAar3B,EAAI,EAAGmW,EAAKC,WAAW2U,GAAWxI,EAAa,cAAev0B,KAAKgG,MAAMsiC,iBAGzFpL,GAAWl9B,KAAKyO,QAAyB,iBAAoB,GAAfzO,KAAKgnC,QAChB,GAAnChnC,KAAKyO,QAAyB,iBAA6B,GAAfzO,KAAKgnC,QAA8B,GAAX9J,GAClElrB,GAAK,GACPhS,KAAKqpC,aAAar3B,EAAI,EAAGmW,EAAKC,WAAW2U,GAAWxI,EAAa,cAAev0B,KAAKgG,MAAMwiC,iBAE7FxoC,KAAKspC,YAAYt3B,EAAGuiB,EAAa,wBAAyBv0B,KAAKyO,QAAQw3B,iBAAkBjmC,KAAKgG,MAAM2iC,iBAGpG3oC,KAAKspC,YAAYt3B,EAAGuiB,EAAa,wBAAyBv0B,KAAKyO,QAAQy3B,iBAAkBlmC,KAAKgG,MAAMyiC,gBAGnF,GAAfzoC,KAAKgnC,QAAkC,GAAhB7e,EAAK2R,UAC9B95B,KAAK8mC,aAAep6B,GAGtBA,IAIA1M,KAAK0mC,iBADY,GAAf1mC,KAAKgnC,OACiBh1B,GAAKhS,KAAKkpC,YAAc/gB,EAAK2R,SAG7B95B,KAAK+vB,IAAIzQ,MAAMgR,aAAenI,EAAKwU,WAI7D,IAAI4M,GAAa,CACuB/iC,UAApCxG,KAAKyO,QAAQu2B,MAAMzQ,IAAuE/tB,SAAzCxG,KAAKyO,QAAQu2B,MAAMzQ,GAAahL,OACnFggB,EAAavpC,KAAKgG,MAAMwjC,gBAE1B,IAAI7f,GAA+B,GAAtB3pB,KAAKyO,QAAQu3B,MAAgB9gC,KAAKwH,IAAI1M,KAAKyO,QAAQ43B,UAAWkD,GAAcvpC,KAAKyO,QAAQ03B,aAAe,GAAKoD,EAAavpC,KAAKyO,QAAQ03B,aAAe,EA0BnK,OAvBInmC,MAAKopC,aAAgBppC,KAAKuS,MAAQoX,GAAmC,GAAxB3pB,KAAKyO,QAAQia,SAC5D1oB,KAAKuS,MAAQvS,KAAKopC,aAAezf,EACjC3pB,KAAKyO,QAAQ8D,MAAQvS,KAAKuS,MAAQ,KAClC3R,EAAQsQ,gBAAgBlR,KAAKumC,YAAYC,OACzC5lC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYE,QACzCzmC,KAAKyhB,SACLgjB,GAAU,GAGHzkC,KAAKopC,aAAgBppC,KAAKuS,MAAQoX,GAAmC,GAAxB3pB,KAAKyO,QAAQia,SAAmB1oB,KAAKuS,MAAQvS,KAAK2mC,UACtG3mC,KAAKuS,MAAQrN,KAAKwH,IAAI1M,KAAK2mC,SAAS3mC,KAAKopC,aAAezf,GACxD3pB,KAAKyO,QAAQ8D,MAAQvS,KAAKuS,MAAQ,KAClC3R,EAAQsQ,gBAAgBlR,KAAKumC,YAAYC,OACzC5lC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYE,QACzCzmC,KAAKyhB,SACLgjB,GAAU,IAGV7jC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYC,OACzC5lC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYE,QACzChC,GAAU,GAGLA,GAGT/hC,EAASyQ,UAAUs2B,aAAe,SAAUpiC,GAC1C,GAAIqiC,GAAgB1pC,KAAKkpC,YAAc7hC,EACnCsiC,EAAiBD,EAAgB1pC,KAAK0mC,gBAC1C,OAAOiD,IAYTjnC,EAASyQ,UAAUk2B,aAAe,SAAUr3B,EAAGuX,EAAMgL,EAAa1sB,EAAW+hC,GAE3E,GAAInhB,GAAQ7nB,EAAQ8Q,cAAc,MAAM1R,KAAKumC,YAAYE,OAAQzmC,KAAK+vB,IAAIzQ,MAC1EmJ,GAAM5gB,UAAYA,EAClB4gB,EAAMxE,UAAYsF,EACC,QAAfgL,GACF9L,EAAMxb,MAAMxF,KAAO,IAAMzH,KAAKyO,QAAQ03B,aAAe,KACrD1d,EAAMxb,MAAMqb,UAAY,UAGxBG,EAAMxb,MAAMoa,MAAQ,IAAMrnB,KAAKyO,QAAQ03B,aAAe,KACtD1d,EAAMxb,MAAMqb,UAAY,QAG1BG,EAAMxb,MAAMtF,IAAMqK,EAAI,GAAM43B,EAAkB5pC,KAAKyO,QAAQ23B,aAAe,KAE1E7c,GAAQ,EAER,IAAIsgB,GAAe3kC,KAAKwH,IAAI1M,KAAKgG,MAAM8jC,eAAe9pC,KAAKgG,MAAM+jC,eAC7D/pC,MAAKopC,aAAe7f,EAAK5jB,OAASkkC,IACpC7pC,KAAKopC,aAAe7f,EAAK5jB,OAASkkC,IAYtCnnC,EAASyQ,UAAUm2B,YAAc,SAAUt3B,EAAGuiB,EAAa1sB,EAAW8hB,EAAQpX,GAC5E,GAAmB,GAAfvS,KAAKgnC,OAAgB,CACvB,GAAInX,GAAOjvB,EAAQ8Q,cAAc,MAAM1R,KAAKumC,YAAYC,MAAOxmC,KAAK+vB,IAAIqX,cACxEvX,GAAKhoB,UAAYA,EACjBgoB,EAAK5L,UAAY,GAEE,QAAfsQ,EACF1E,EAAK5iB,MAAMxF,KAAQzH,KAAKuS,MAAQoX,EAAU,KAG1CkG,EAAK5iB,MAAMoa,MAASrnB,KAAKuS,MAAQoX,EAAU,KAG7CkG,EAAK5iB,MAAMsF,MAAQA,EAAQ,KAC3Bsd,EAAK5iB,MAAMtF,IAAMqK,EAAI,OASzBtP,EAASyQ,UAAU21B,aAAe,SAAUvU,GAI1C,GAHA3zB,EAAQiQ,gBAAgB7Q,KAAKumC,YAAYvB,OAGDx+B,SAApCxG,KAAKyO,QAAQu2B,MAAMzQ,IAAuE/tB,SAAzCxG,KAAKyO,QAAQu2B,MAAMzQ,GAAahL,KAAoB,CACvG,GAAIyb,GAAQpkC,EAAQ8Q,cAAc,MAAO1R,KAAKumC,YAAYvB,MAAOhlC,KAAK+vB,IAAIzQ,MAC1E0lB,GAAMn9B,UAAY,eAAiB0sB,EACnCyQ,EAAM/gB,UAAYjkB,KAAKyO,QAAQu2B,MAAMzQ,GAAahL,KAGJ/iB,SAA1CxG,KAAKyO,QAAQu2B,MAAMzQ,GAAatnB,OAClCtM,EAAK2M,WAAW03B,EAAOhlC,KAAKyO,QAAQu2B,MAAMzQ,GAAatnB,OAGtC,QAAfsnB,EACFyQ,EAAM/3B,MAAMxF,KAAOzH,KAAKgG,MAAMwjC,gBAAkB,KAGhDxE,EAAM/3B,MAAMoa,MAAQrnB,KAAKgG,MAAMwjC,gBAAkB,KAGnDxE,EAAM/3B,MAAMsF,MAAQvS,KAAKwS,OAAS,KAIpC5R,EAAQsQ,gBAAgBlR,KAAKumC,YAAYvB,QAW3CtiC,EAASyQ,UAAUi1B,mBAAqB,WAEtC,KAAM,mBAAqBpoC,MAAKgG,OAAQ,CACtC,GAAIgkC,GAAYz4B,SAAS04B,eAAe,KACpCC,EAAmB34B,SAASM,cAAc,MAC9Cq4B,GAAiBriC,UAAY,sBAC7BqiC,EAAiBz4B,YAAYu4B,GAC7BhqC,KAAK+vB,IAAIzQ,MAAM7N,YAAYy4B,GAE3BlqC,KAAKgG,MAAMsiC,gBAAkB4B,EAAiBrlB,aAC9C7kB,KAAKgG,MAAM+jC,eAAiBG,EAAiB1qB,YAE7Cxf,KAAK+vB,IAAIzQ,MAAMnO,YAAY+4B,GAG7B,KAAM,mBAAqBlqC,MAAKgG,OAAQ,CACtC,GAAImkC,GAAY54B,SAAS04B,eAAe,KACpCG,EAAmB74B,SAASM,cAAc,MAC9Cu4B,GAAiBviC,UAAY,sBAC7BuiC,EAAiB34B,YAAY04B,GAC7BnqC,KAAK+vB,IAAIzQ,MAAM7N,YAAY24B,GAE3BpqC,KAAKgG,MAAMwiC,gBAAkB4B,EAAiBvlB,aAC9C7kB,KAAKgG,MAAM8jC,eAAiBM,EAAiB5qB,YAE7Cxf,KAAK+vB,IAAIzQ,MAAMnO,YAAYi5B,GAG7B,KAAM,mBAAqBpqC,MAAKgG,OAAQ,CACtC,GAAIqkC,GAAY94B,SAAS04B,eAAe,KACpCK,EAAmB/4B,SAASM,cAAc,MAC9Cy4B,GAAiBziC,UAAY,sBAC7ByiC,EAAiB74B,YAAY44B,GAC7BrqC,KAAK+vB,IAAIzQ,MAAM7N,YAAY64B,GAE3BtqC,KAAKgG,MAAMwjC,gBAAkBc,EAAiBzlB,aAC9C7kB,KAAKgG,MAAMukC,eAAiBD,EAAiB9qB,YAE7Cxf,KAAK+vB,IAAIzQ,MAAMnO,YAAYm5B,KAU/B5nC,EAASyQ,UAAU6hB,KAAO,SAASyD,GACjC,MAAOz4B,MAAKmoB,KAAK6M,KAAKyD,IAGxB54B,EAAOD,QAAU8C,GAKb,SAAS7C,EAAQD,EAASM,GAkB9B,QAASyC,GAAYsP,EAAOqlB,EAAS7oB,EAAS+7B,GAC5CxqC,KAAKK,GAAKi3B,CACV,IAAIppB,IAAU,WAAW,QAAQ,OAAO,mBAAmB,WAAW,aAAa,SAAS,aAC5FlO,MAAKyO,QAAU9N,EAAKsN,sBAAsBC,EAAOO,GACjDzO,KAAKyqC,kBAAwCjkC,SAApByL,EAAMpK,UAC/B7H,KAAKwqC,yBAA2BA,EAChCxqC,KAAK0qC,aAAe,EACpB1qC,KAAK6U,OAAO5C,GACkB,GAA1BjS,KAAKyqC,oBACPzqC,KAAKwqC,yBAAyB,IAAM,GAEtCxqC,KAAK81B,aACL91B,KAAK0oB,QAA4BliB,SAAlByL,EAAMyW,SAAwB,EAAOzW,EAAMyW,QA5B5D,GAAI/nB,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9ByqC,EAAOzqC,EAAoB,IAC3B0qC,EAAM1qC,EAAoB,IAC1B2qC,EAAS3qC,EAAoB,GAgCjCyC,GAAWwQ,UAAU8iB,SAAW,SAASh0B,GAC1B,MAATA,GACFjC,KAAK81B,UAAY7zB,EACQ,GAArBjC,KAAKyO,QAAQyH,MACflW,KAAK81B,UAAU5f,KAAK,SAAU3Q,EAAEa,GAAI,MAAOb,GAAEwM,EAAI3L,EAAE2L,KAIrD/R,KAAK81B,cASTnzB,EAAWwQ,UAAU23B,gBAAkB,SAASvlB,GAC9CvlB,KAAK0qC,aAAenlB,GAQtB5iB,EAAWwQ,UAAUD,WAAa,SAASzE,GACzC,GAAgBjI,SAAZiI,EAAuB,CACzB,GAAIP,IAAU,WAAW,QAAQ,OAAO,mBAAmB,WAC3DvN,GAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASA,GAE/C9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,UAEpCA,EAAQs8B,YACuB,gBAAtBt8B,GAAQs8B,YACbt8B,EAAQs8B,WAAWC,kBACqB,WAAtCv8B,EAAQs8B,WAAWC,gBACrBhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,EAEa,WAAtCx8B,EAAQs8B,WAAWC,gBAC1BhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,GAGhCjrC,KAAKyO,QAAQs8B,WAAWC,gBAAkB,cAC1ChrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,KAOhB,QAAtBjrC,KAAKyO,QAAQxB,MACfjN,KAAK8G,KAAO,GAAI6jC,GAAK3qC,KAAKK,GAAIL,KAAKyO,SAEN,OAAtBzO,KAAKyO,QAAQxB,MACpBjN,KAAK8G,KAAO,GAAI8jC,GAAI5qC,KAAKK,GAAIL,KAAKyO,SAEL,UAAtBzO,KAAKyO,QAAQxB,QACpBjN,KAAK8G,KAAO,GAAI+jC,GAAO7qC,KAAKK,GAAIL,KAAKyO,WASzC9L,EAAWwQ,UAAU0B,OAAS,SAAS5C,GACrCjS,KAAKiS,MAAQA,EACbjS,KAAK4vB,QAAU3d,EAAM2d,SAAW,QAChC5vB,KAAK6H,UAAYoK,EAAMpK,WAAa7H,KAAK6H,WAAa,aAAe7H,KAAKwqC,yBAAyB,GAAK,GACxGxqC,KAAK0oB,QAA4BliB,SAAlByL,EAAMyW,SAAwB,EAAOzW,EAAMyW,QAC1D1oB,KAAKiN,MAAQgF,EAAMhF,MACnBjN,KAAKkT,WAAWjB,EAAMxD,UAcxB9L,EAAWwQ,UAAU60B,SAAW,SAASj2B,EAAGC,EAAGlB,EAAeo6B,EAAc7E,EAAWyB,GACrF,GACIqD,GAAMC,EADNC,EAA0B,GAAbvD,EAGbwD,EAAU1qC,EAAQwQ,cAAc,OAAQN,EAAeo6B,EAO3D,IANAI,EAAQl5B,eAAe,KAAM,IAAKL,GAClCu5B,EAAQl5B,eAAe,KAAM,IAAKJ,EAAIq5B,GACtCC,EAAQl5B,eAAe,KAAM,QAASi0B,GACtCiF,EAAQl5B,eAAe,KAAM,SAAU,EAAEi5B,GACzCC,EAAQl5B,eAAe,KAAM,QAAS,WAEZ,QAAtBpS,KAAKyO,QAAQxB,MACfk+B,EAAOvqC,EAAQwQ,cAAc,OAAQN,EAAeo6B,GACpDC,EAAK/4B,eAAe,KAAM,QAASpS,KAAK6H,WACtBrB,SAAfxG,KAAKiN,OACNk+B,EAAK/4B,eAAe,KAAM,QAASpS,KAAKiN,OAG1Ck+B,EAAK/4B,eAAe,KAAM,IAAK,IAAML,EAAI,IAAIC,EAAE,MAAQD,EAAIs0B,GAAa,IAAIr0B,GACzC,GAA/BhS,KAAKyO,QAAQ88B,OAAO78B,UACtB08B,EAAWxqC,EAAQwQ,cAAc,OAAQN,EAAeo6B,GACjB,OAAnClrC,KAAKyO,QAAQ88B,OAAOhX,YACtB6W,EAASh5B,eAAe,KAAM,IAAK,IAAIL,EAAE,MAAQC,EAAIq5B,GACnD,IAAIt5B,EAAE,IAAIC,EAAE,MAAOD,EAAIs0B,GAAa,IAAIr0B,EAAE,MAAOD,EAAIs0B,GAAa,KAAOr0B,EAAIq5B,IAG/ED,EAASh5B,eAAe,KAAM,IAAK,IAAIL,EAAE,IAAIC,EAAE,KACzCD,EAAE,KAAOC,EAAIq5B,GAAc,MACzBt5B,EAAIs0B,GAAa,KAAOr0B,EAAIq5B,GAClC,KAAMt5B,EAAIs0B,GAAa,IAAIr0B,GAE/Bo5B,EAASh5B,eAAe,KAAM,QAASpS,KAAK6H,UAAY,cAGnB,GAAnC7H,KAAKyO,QAAQ0D,WAAWzD,SAC1B9N,EAAQkR,UAAUC,EAAI,GAAMs0B,EAAUr0B,EAAGhS,KAAM8Q,EAAeo6B,OAG7D,CACH,GAAIM,GAAWtmC,KAAKwoB,MAAM,GAAM2Y,GAC5BoF,EAAavmC,KAAKwoB,MAAM,GAAMoa,GAC9B4D,EAAaxmC,KAAKwoB,MAAM,IAAOoa,GAE/Bne,EAASzkB,KAAKwoB,OAAO2Y,EAAa,EAAImF,GAAW,EAErD5qC,GAAQ0R,QAAQP,EAAI,GAAIy5B,EAAW7hB,EAAY3X,EAAIq5B,EAAaI,EAAa,EAAGD,EAAUC,EAAYzrC,KAAK6H,UAAY,OAAQiJ,EAAeo6B,GAC9ItqC,EAAQ0R,QAAQP,EAAI,IAAIy5B,EAAW7hB,EAAS,EAAG3X,EAAIq5B,EAAaK,EAAa,EAAGF,EAAUE,EAAY1rC,KAAK6H,UAAY,OAAQiJ,EAAeo6B,KAYlJvoC,EAAWwQ,UAAUkkB,UAAY,SAASgP,EAAWyB,GACnD,GAAIlC,GAAMr0B,SAASC,gBAAgB,6BAA6B,MAEhE,OADAxR,MAAKgoC,SAAS,EAAE,GAAIF,KAAclC,EAAIS,EAAUyB,IACxC6D,KAAM/F,EAAKnd,MAAOzoB,KAAK4vB,QAAS2E,YAAYv0B,KAAKyO,QAAQm9B,mBAGnEjpC,EAAWwQ,UAAU04B,UAAY,SAASC,GACxC,MAAO9rC,MAAK8G,KAAK+kC,UAAUC,IAG7BnpC,EAAWwQ,UAAU44B,KAAO,SAAS/U,EAAS/kB,EAAO+5B,GACnDhsC,KAAK8G,KAAKilC,KAAK/U,EAAS/kB,EAAO+5B,IAIjCnsC,EAAOD,QAAU+C,GAKb,SAAS9C,EAAQD,EAASM,GAY9B,QAAS0C,GAAO00B,EAAS5kB,EAAMmjB,GAC7B71B,KAAKs3B,QAAUA,EACft3B,KAAKmhC,aACLnhC,KAAKisC,cAAgB,EACrBjsC,KAAKksC,gBAAkBx5B,GAAQA,EAAKy5B,cACpCnsC,KAAK61B,QAAUA,EAEf71B,KAAK+vB,OACL/vB,KAAKgG,OACHyiB,OACElW,MAAO,EACPC,OAAQ,IAGZxS,KAAK6H,UAAY,KAEjB7H,KAAKiC,SACLjC,KAAKosC,gBACLpsC,KAAK4O,cACHy9B,WACAC,UAEFtsC,KAAKusC,kBAAmB,CACxB,IAAIp4B,GAAKnU,IACTA,MAAK61B,QAAQlB,KAAKE,QAAQthB,GAAG,mBAAoB,WAC/CY,EAAGo4B,kBAAmB,IAGxBvsC,KAAK00B,UAEL10B,KAAKgY,QAAQtF,GAxCf,CAAA,GAAI/R,GAAOT,EAAoB,GAC3B4B,EAAQ5B,EAAoB,GAChBA,GAAoB,IA6CpC0C,EAAMuQ,UAAUuhB,QAAU,WACxB,GAAIjM,GAAQlX,SAASM,cAAc,MACnC4W,GAAM5gB,UAAY,SAClB7H,KAAK+vB,IAAItH,MAAQA,CAEjB,IAAI+jB,GAAQj7B,SAASM,cAAc,MACnC26B,GAAM3kC,UAAY,QAClB4gB,EAAMhX,YAAY+6B,GAClBxsC,KAAK+vB,IAAIyc,MAAQA,CAEjB,IAAIC,GAAal7B,SAASM,cAAc,MACxC46B,GAAW5kC,UAAY,QACvB4kC,EAAW,kBAAoBzsC,KAC/BA,KAAK+vB,IAAI0c,WAAaA,EAEtBzsC,KAAK+vB,IAAI5jB,WAAaoF,SAASM,cAAc,OAC7C7R,KAAK+vB,IAAI5jB,WAAWtE,UAAY,QAEhC7H,KAAK+vB,IAAI+Q,KAAOvvB,SAASM,cAAc,OACvC7R,KAAK+vB,IAAI+Q,KAAKj5B,UAAY,QAK1B7H,KAAK+vB,IAAI2c,OAASn7B,SAASM,cAAc,OACzC7R,KAAK+vB,IAAI2c,OAAOz/B,MAAMuqB,WAAa,SACnCx3B,KAAK+vB,IAAI2c,OAAOzoB,UAAY,IAC5BjkB,KAAK+vB,IAAI5jB,WAAWsF,YAAYzR,KAAK+vB,IAAI2c,SAO3C9pC,EAAMuQ,UAAU6E,QAAU,SAAStF,GAEjC,GAAIkd,GAAUld,GAAQA,EAAKkd,OACvBA,aAAmB+c,SACrB3sC,KAAK+vB,IAAIyc,MAAM/6B,YAAYme,GAG3B5vB,KAAK+vB,IAAIyc,MAAMvoB,UADIzd,SAAZopB,GAAqC,OAAZA,EACLA,EAGA5vB,KAAKs3B,SAAW,GAI7Ct3B,KAAK+vB,IAAItH,MAAMuc,MAAQtyB,GAAQA,EAAKsyB,OAAS,GAExChlC,KAAK+vB,IAAIyc,MAAM7oB,WAIlBhjB,EAAKuH,gBAAgBlI,KAAK+vB,IAAIyc,MAAO,UAHrC7rC,EAAKiH,aAAa5H,KAAK+vB,IAAIyc,MAAO,SAOpC,IAAI3kC,GAAY6K,GAAQA,EAAK7K,WAAa,IACtCA,IAAa7H,KAAK6H,YAChB7H,KAAK6H,YACPlH,EAAKuH,gBAAgBlI,KAAK+vB,IAAItH,MAAOzoB,KAAK6H,WAC1ClH,EAAKuH,gBAAgBlI,KAAK+vB,IAAI0c,WAAYzsC,KAAK6H,WAC/ClH,EAAKuH,gBAAgBlI,KAAK+vB,IAAI5jB,WAAYnM,KAAK6H,WAC/ClH,EAAKuH,gBAAgBlI,KAAK+vB,IAAI+Q,KAAM9gC,KAAK6H,YAE3ClH,EAAKiH,aAAa5H,KAAK+vB,IAAItH,MAAO5gB,GAClClH,EAAKiH,aAAa5H,KAAK+vB,IAAI0c,WAAY5kC,GACvClH,EAAKiH,aAAa5H,KAAK+vB,IAAI5jB,WAAYtE,GACvClH,EAAKiH,aAAa5H,KAAK+vB,IAAI+Q,KAAMj5B,GACjC7H,KAAK6H,UAAYA,GAIf7H,KAAKiN,QACPtM,EAAK8M,cAAczN,KAAK+vB,IAAItH,MAAOzoB,KAAKiN,OACxCjN,KAAKiN,MAAQ,MAEXyF,GAAQA,EAAKzF,QACftM,EAAK2M,WAAWtN,KAAK+vB,IAAItH,MAAO/V,EAAKzF,OACrCjN,KAAKiN,MAAQyF,EAAKzF,QAQtBrK,EAAMuQ,UAAUy5B,cAAgB,WAC9B,MAAO5sC,MAAKgG,MAAMyiB,MAAMlW,OAW1B3P,EAAMuQ,UAAUsO,OAAS,SAASgU,EAAO/b,EAAQmzB,GAC/C,GAAIpI,IAAU,CAEdzkC,MAAKosC,aAAepsC,KAAK8sC,oBAAoB9sC,KAAK4O,aAAc5O,KAAKosC,aAAc3W,EAInF,IAAIsX,GAAe/sC,KAAK+vB,IAAI2c,OAAO7nB,YAC/BkoB,IAAgB/sC,KAAKgtC,mBACvBhtC,KAAKgtC,iBAAmBD,EAExBpsC,EAAK0H,QAAQrI,KAAKiC,MAAO,SAAUoN,GACjCA,EAAK49B,OAAQ,EACT59B,EAAK69B,WAAW79B,EAAKoS,WAG3BorB,GAAU,GAIR7sC,KAAK61B,QAAQpnB,QAAQ3M,MACvBA,EAAMA,MAAM9B,KAAKosC,aAAc1yB,EAAQmzB,GAGvC/qC,EAAMo/B,QAAQlhC,KAAKosC,aAAc1yB,EAAQ1Z,KAAKmhC,UAIhD,IAAI3uB,GAASxS,KAAKmtC,iBAAiBzzB,GAG/B+yB,EAAazsC,KAAK+vB,IAAI0c,UAC1BzsC,MAAK2H,IAAM8kC,EAAWW,UACtBptC,KAAKyH,KAAOglC,EAAWY,WACvBrtC,KAAKuS,MAAQk6B,EAAWrc,YACxBqU,EAAU9jC,EAAK8H,eAAezI,KAAM,SAAUwS,IAAWiyB,EAGzDA,EAAU9jC,EAAK8H,eAAezI,KAAKgG,MAAMyiB,MAAO,QAASzoB,KAAK+vB,IAAIyc,MAAMhtB,cAAgBilB,EACxFA,EAAU9jC,EAAK8H,eAAezI,KAAKgG,MAAMyiB,MAAO,SAAUzoB,KAAK+vB,IAAIyc,MAAM3nB,eAAiB4f,EAG1FzkC,KAAK+vB,IAAI5jB,WAAWc,MAAMuF,OAAUA,EAAS,KAC7CxS,KAAK+vB,IAAI0c,WAAWx/B,MAAMuF,OAAUA,EAAS,KAC7CxS,KAAK+vB,IAAItH,MAAMxb,MAAMuF,OAASA,EAAS,IAGvC,KAAK,GAAIhN,GAAI,EAAG8nC,EAAKttC,KAAKosC,aAAazmC,OAAY2nC,EAAJ9nC,EAAQA,IAAK,CAC1D,GAAI6J,GAAOrP,KAAKosC,aAAa5mC,EAC7B6J,GAAKk+B,YAAY7zB,GAGnB,MAAO+qB,IAST7hC,EAAMuQ,UAAUg6B,iBAAmB,SAAUzzB,GAE3C,GAAIlH,GACA45B,EAAepsC,KAAKosC,YAGxBpsC,MAAKwtC,gBACL,IAAIr5B,GAAKnU,IACT,IAAIosC,EAAazmC,OAAQ,CACvB,GAAImG,GAAMsgC,EAAa,GAAGzkC,IACtB+E,EAAM0/B,EAAa,GAAGzkC,IAAMykC,EAAa,GAAG55B,MAahD,IAZA7R,EAAK0H,QAAQ+jC,EAAc,SAAU/8B,GACnCvD,EAAM5G,KAAK4G,IAAIA,EAAKuD,EAAK1H,KACzB+E,EAAMxH,KAAKwH,IAAIA,EAAM2C,EAAK1H,IAAM0H,EAAKmD,QACVhM,SAAvB6I,EAAKqD,KAAK2uB,WACZltB,EAAGgtB,UAAU9xB,EAAKqD,KAAK2uB,UAAU7uB,OAAStN,KAAKwH,IAAIyH,EAAGgtB,UAAU9xB,EAAKqD,KAAK2uB,UAAU7uB,OAAOnD,EAAKmD,QAChG2B,EAAGgtB,UAAU9xB,EAAKqD,KAAK2uB,UAAU3Y,SAAU,KAO3C5c,EAAM4N,EAAOonB,KAAM,CAErB,GAAInX,GAAS7d,EAAM4N,EAAOonB,IAC1Bp0B,IAAOid,EACPhpB,EAAK0H,QAAQ+jC,EAAc,SAAU/8B,GACnCA,EAAK1H,KAAOgiB,IAGhBnX,EAAS9F,EAAMgN,EAAOrK,KAAKoW,SAAW,MAGtCjT,GAASkH,EAAOonB,KAAOpnB,EAAOrK,KAAKoW,QAIrC,OAFAjT,GAAStN,KAAKwH,IAAI8F,EAAQxS,KAAKgG,MAAMyiB,MAAMjW,SAQ7C5P,EAAMuQ,UAAUw0B,KAAO,WAChB3nC,KAAK+vB,IAAItH,MAAM5e,YAClB7J,KAAK61B,QAAQ9F,IAAI0d,SAASh8B,YAAYzR,KAAK+vB,IAAItH,OAG5CzoB,KAAK+vB,IAAI0c,WAAW5iC,YACvB7J,KAAK61B,QAAQ9F,IAAI0c,WAAWh7B,YAAYzR,KAAK+vB,IAAI0c,YAG9CzsC,KAAK+vB,IAAI5jB,WAAWtC,YACvB7J,KAAK61B,QAAQ9F,IAAI5jB,WAAWsF,YAAYzR,KAAK+vB,IAAI5jB,YAG9CnM,KAAK+vB,IAAI+Q,KAAKj3B,YACjB7J,KAAK61B,QAAQ9F,IAAI+Q,KAAKrvB,YAAYzR,KAAK+vB,IAAI+Q,OAO/Cl+B,EAAMuQ,UAAUu0B,KAAO,WACrB,GAAIjf,GAAQzoB,KAAK+vB,IAAItH,KACjBA,GAAM5e,YACR4e,EAAM5e,WAAWsH,YAAYsX,EAG/B,IAAIgkB,GAAazsC,KAAK+vB,IAAI0c,UACtBA,GAAW5iC,YACb4iC,EAAW5iC,WAAWsH,YAAYs7B,EAGpC,IAAItgC,GAAanM,KAAK+vB,IAAI5jB,UACtBA,GAAWtC,YACbsC,EAAWtC,WAAWsH,YAAYhF,EAGpC,IAAI20B,GAAO9gC,KAAK+vB,IAAI+Q,IAChBA,GAAKj3B,YACPi3B,EAAKj3B,WAAWsH,YAAY2vB,IAQhCl+B,EAAMuQ,UAAUF,IAAM,SAAS5D,GAc7B,GAbArP,KAAKiC,MAAMoN,EAAKhP,IAAMgP,EACtBA,EAAKq+B,UAAU1tC,MAGYwG,SAAvB6I,EAAKqD,KAAK2uB,WAC+B76B,SAAvCxG,KAAKmhC,UAAU9xB,EAAKqD,KAAK2uB,YAC3BrhC,KAAKmhC,UAAU9xB,EAAKqD,KAAK2uB,WAAa7uB,OAAO,EAAGkW,SAAS,EAAOvgB,MAAMnI,KAAKisC,cAAehqC,UAC1FjC,KAAKisC,iBAEPjsC,KAAKmhC,UAAU9xB,EAAKqD,KAAK2uB,UAAUp/B,MAAM+F,KAAKqH,IAEhDrP,KAAK2tC,iBAEkC,IAAnC3tC,KAAKosC,aAAazlC,QAAQ0I,GAAa,CACzC,GAAIomB,GAAQz1B,KAAK61B,QAAQlB,KAAKc,KAC9Bz1B,MAAK4tC,gBAAgBv+B,EAAMrP,KAAKosC,aAAc3W,KAIlD7yB,EAAMuQ,UAAUw6B,eAAiB,WAC/B,GAA6BnnC,SAAzBxG,KAAKksC,gBAA+B,CACtC,GAAI2B,KACJ,IAAmC,gBAAxB7tC,MAAKksC,gBAA6B,CAC3C,IAAK,GAAI7K,KAAYrhC,MAAKmhC,UACxB0M,EAAU7lC,MAAMq5B,SAAUA,EAAUyM,UAAW9tC,KAAKmhC,UAAUE,GAAUp/B,MAAM,GAAGyQ,KAAK1S,KAAKksC,kBAE7F2B,GAAU33B,KAAK,SAAU3Q,EAAGa,GAC1B,MAAOb,GAAEuoC,UAAY1nC,EAAE0nC,gBAGtB,IAAmC,kBAAxB9tC,MAAKksC,gBAA+B,CAClD,IAAK,GAAI7K,KAAYrhC,MAAKmhC,UACxB0M,EAAU7lC,KAAKhI,KAAKmhC,UAAUE,GAAUp/B,MAAM,GAAGyQ,KAEnDm7B,GAAU33B,KAAKlW,KAAKksC,iBAGtB,GAAI2B,EAAUloC,OAAS,EACrB,IAAK,GAAIH,GAAI,EAAGA,EAAIqoC,EAAUloC,OAAQH,IACpCxF,KAAKmhC,UAAU0M,EAAUroC,GAAG67B,UAAUl5B,MAAQ3C,IAMtD5C,EAAMuQ,UAAUq6B,eAAiB,WAC/B,IAAK,GAAInM,KAAYrhC,MAAKmhC,UACpBnhC,KAAKmhC,UAAUr7B,eAAeu7B,KAChCrhC,KAAKmhC,UAAUE,GAAU3Y,SAAU,IASzC9lB,EAAMuQ,UAAUkD,OAAS,SAAShH,SACzBrP,MAAKiC,MAAMoN,EAAKhP,IACvBgP,EAAKq+B,UAAU,KAGf,IAAIvlC,GAAQnI,KAAKosC,aAAazlC,QAAQ0I,EACzB,KAATlH,GAAanI,KAAKosC,aAAahkC,OAAOD,EAAO,IAUnDvF,EAAMuQ,UAAU46B,kBAAoB,SAAS1+B,GAC3CrP,KAAK61B,QAAQmY,WAAW3+B,EAAKhP,KAO/BuC,EAAMuQ,UAAUsC,MAAQ,WAKtB,IAAK,GAJDjN,GAAQ7H,EAAK4H,QAAQvI,KAAKiC,OAC1BgsC,KACAC,KAEK1oC,EAAI,EAAGA,EAAIgD,EAAM7C,OAAQH,IACNgB,SAAtBgC,EAAMhD,GAAGkN,KAAK7C,KAChBq+B,EAASlmC,KAAKQ,EAAMhD,IAEtByoC,EAAWjmC,KAAKQ,EAAMhD,GAExBxF,MAAK4O,cACHy9B,QAAS4B,EACT3B,MAAO4B,GAGTpsC,EAAM0+B,aAAaxgC,KAAK4O,aAAay9B,SACrCvqC,EAAM2+B,WAAWzgC,KAAK4O,aAAa09B,QAYrC1pC,EAAMuQ,UAAU25B,oBAAsB,SAASl+B,EAAcu/B,EAAiB1Y,GAC5E,GAKIpmB,GAAM7J,EALN4mC,KACAgC,KACA5b,GAAYiD,EAAM5lB,IAAM4lB,EAAM7lB,OAAS,EACvCy+B,EAAa5Y,EAAM7lB,MAAQ4iB,EAC3B8b,EAAa7Y,EAAM5lB,IAAM2iB,EAIzB3jB,EAAiB,SAAUxH,GAC7B,MAAiBgnC,GAARhnC,EAA6B,GACpBinC,GAATjnC,EAA8B,EACA,EAMzC,IAAI8mC,EAAgBxoC,OAAS,EAC3B,IAAKH,EAAI,EAAGA,EAAI2oC,EAAgBxoC,OAAQH,IACtCxF,KAAKuuC,6BAA6BJ,EAAgB3oC,GAAI4mC,EAAcgC,EAAoB3Y,EAK5F,IAAI+Y,GAAoB7tC,EAAKgO,mBAAmBC,EAAay9B,QAASx9B,EAAgB,OAAO,QAS7F,IANA7O,KAAKyuC,cAAcD,EAAmB5/B,EAAay9B,QAASD,EAAcgC,EAAoB,SAAU/+B,GACtG,MAAQA,GAAKqD,KAAK9C,MAAQy+B,GAAch/B,EAAKqD,KAAK9C,MAAQ0+B,IAK/B,GAAzBtuC,KAAKusC,iBAEP,IADAvsC,KAAKusC,kBAAmB,EACnB/mC,EAAI,EAAGA,EAAIoJ,EAAa09B,MAAM3mC,OAAQH,IACzCxF,KAAKuuC,6BAA6B3/B,EAAa09B,MAAM9mC,GAAI4mC,EAAcgC,EAAoB3Y,OAG1F,CAEH,GAAIiZ,GAAkB/tC,EAAKgO,mBAAmBC,EAAa09B,MAAOz9B,EAAgB,OAAO,MAGzF7O,MAAKyuC,cAAcC,EAAiB9/B,EAAa09B,MAAOF,EAAcgC,EAAoB,SAAU/+B,GAClG,MAAQA,GAAKqD,KAAK7C,IAAMw+B,GAAch/B,EAAKqD,KAAK7C,IAAMy+B,IAM1D,IAAK9oC,EAAI,EAAGA,EAAI4mC,EAAazmC,OAAQH,IACnC6J,EAAO+8B,EAAa5mC,GACf6J,EAAK69B,WAAW79B,EAAKs4B,OAE1Bt4B,EAAKs/B,aAgBP,OAAOvC,IAGTxpC,EAAMuQ,UAAUs7B,cAAgB,SAAUG,EAAY3sC,EAAOmqC,EAAcgC,EAAoBS,GAC7F,GAAIx/B,GACA7J,CAEJ,IAAkB,IAAdopC,EAAkB,CACpB,IAAKppC,EAAIopC,EAAYppC,GAAK,IACxB6J,EAAOpN,EAAMuD,IACTqpC,EAAex/B,IAFQ7J,IAMWgB,SAAhC4nC,EAAmB/+B,EAAKhP,MAC1B+tC,EAAmB/+B,EAAKhP,KAAM,EAC9B+rC,EAAapkC,KAAKqH,GAKxB,KAAK7J,EAAIopC,EAAa,EAAGppC,EAAIvD,EAAM0D,SACjC0J,EAAOpN,EAAMuD,IACTqpC,EAAex/B,IAFsB7J,IAMHgB,SAAhC4nC,EAAmB/+B,EAAKhP,MAC1B+tC,EAAmB/+B,EAAKhP,KAAM,EAC9B+rC,EAAapkC,KAAKqH,MAmB5BzM,EAAMuQ,UAAUy6B,gBAAkB,SAASv+B,EAAM+8B,EAAc3W,GACvDpmB,EAAKy/B,UAAUrZ,IACZpmB,EAAK69B,WAAW79B,EAAKs4B,OAE1Bt4B,EAAKs/B,cACLvC,EAAapkC,KAAKqH,IAGdA,EAAK69B,WAAW79B,EAAKq4B,QAgB/B9kC,EAAMuQ,UAAUo7B,6BAA+B,SAASl/B,EAAM+8B,EAAcgC,EAAoB3Y,GAC1FpmB,EAAKy/B,UAAUrZ,GACmBjvB,SAAhC4nC,EAAmB/+B,EAAKhP,MAC1B+tC,EAAmB/+B,EAAKhP,KAAM,EAC9B+rC,EAAapkC,KAAKqH,IAIhBA,EAAK69B,WAAW79B,EAAKq4B;EAM7B7nC,EAAOD,QAAUgD,GAKb,SAAS/C,EAAQD,EAASM,GAW9B,QAAS2C,GAAiBy0B,EAAS5kB,EAAMmjB,GACvCjzB,EAAMrC,KAAKP,KAAMs3B,EAAS5kB,EAAMmjB,GAEhC71B,KAAKuS,MAAQ,EACbvS,KAAKwS,OAAS,EACdxS,KAAK2H,IAAM,EACX3H,KAAKyH,KAAO,EAfd,GACI7E,IADO1C,EAAoB,GACnBA,EAAoB,IAiBhC2C,GAAgBsQ,UAAY5M,OAAO8H,OAAOzL,EAAMuQ,WAShDtQ,EAAgBsQ,UAAUsO,OAAS,SAASgU,EAAO/b,GACjD,GAAI+qB,IAAU,CAEdzkC,MAAKosC,aAAepsC,KAAK8sC,oBAAoB9sC,KAAK4O,aAAc5O,KAAKosC,aAAc3W,GAGnFz1B,KAAKuS,MAAQvS,KAAK+vB,IAAI5jB,WAAWikB,YAGjCpwB,KAAK+vB,IAAI5jB,WAAWc,MAAMuF,OAAU,GAGpC,KAAK,GAAIhN,GAAI,EAAG8nC,EAAKttC,KAAKosC,aAAazmC,OAAY2nC,EAAJ9nC,EAAQA,IAAK,CAC1D,GAAI6J,GAAOrP,KAAKosC,aAAa5mC,EAC7B6J,GAAKk+B,YAAY7zB,GAGnB,MAAO+qB,IAMT5hC,EAAgBsQ,UAAUw0B,KAAO,WAC1B3nC,KAAK+vB,IAAI5jB,WAAWtC,YACvB7J,KAAK61B,QAAQ9F,IAAI5jB,WAAWsF,YAAYzR,KAAK+vB,IAAI5jB,aAIrDtM,EAAOD,QAAUiD,GAKb,SAAShD,EAAQD,EAASM,GA2B9B,QAAS4C,GAAQ6xB,EAAMlmB,GACrBzO,KAAK20B,KAAOA,EAEZ30B,KAAKq0B,gBACHvtB,KAAM,KACNytB,YAAa,SACbwa,MAAO,OACPjtC,OAAO,EACPktC,WAAY,KAEZC,YAAY,EACZC,UACEC,YAAY,EACZ3H,aAAa,EACbv0B,KAAK,EACLoD,QAAQ,GAGV+4B,MAAO,SAAU//B,EAAM/G,GACrBA,EAAS+G,IAEXggC,SAAU,SAAUhgC,EAAM/G,GACxBA,EAAS+G,IAEXigC,OAAQ,SAAUjgC,EAAM/G,GACtBA,EAAS+G,IAEXkgC,SAAU,SAAUlgC,EAAM/G,GACxBA,EAAS+G,IAEXmgC,SAAU,SAAUngC,EAAM/G,GACxBA,EAAS+G,IAGXqK,QACErK,MACEmW,WAAY,GACZC,SAAU,IAEZqb,KAAM,IAER9c,QAAS,GAIXhkB,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAGpCr0B,KAAKyvC,aACH3oC,MAAO8I,MAAO,OAAQC,IAAK,SAG7B7P,KAAKo6B,YACHnF,SAAUN,EAAKh0B,KAAKs0B,SACpBI,OAAQV,EAAKh0B,KAAK00B,QAEpBr1B,KAAK+vB,OACL/vB,KAAKgG,SACLhG,KAAK8D,OAAS,IAEd,IAAIqQ,GAAKnU,IACTA,MAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGlB/1B,KAAK0vC,eACHz8B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAGw7B,OAAO77B,EAAO7R,QAEnB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAGy7B,UAAU97B,EAAO7R,QAEtBoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG07B,UAAU/7B,EAAO7R,SAKxBjC,KAAK8vC,gBACH78B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAG47B,aAAaj8B,EAAO7R,QAEzB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAG67B,gBAAgBl8B,EAAO7R,QAE5BoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG87B,gBAAgBn8B,EAAO7R,SAI9BjC,KAAKiC,SACLjC,KAAKm0B,UACLn0B,KAAKkwC,YAELlwC,KAAKmwC,aACLnwC,KAAKowC,YAAa,EAElBpwC,KAAKqwC,eAGLrwC,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GA/HlB,GAAI1K,GAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,GAC3BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BqC,EAAYrC,EAAoB,IAChC0C,EAAQ1C,EAAoB,IAC5B2C,EAAkB3C,EAAoB,IACtCkC,EAAUlC,EAAoB,IAC9BmC,EAAYnC,EAAoB,IAChCoC,EAAYpC,EAAoB,IAChCiC,EAAiBjC,EAAoB,IAGrCowC,EAAY,gBACZC,EAAa,gBAoHjBztC,GAAQqQ,UAAY,GAAI5Q,GAGxBO,EAAQoU,OACN/K,WAAYhK,EACZquC,IAAKpuC,EACLqzB,MAAOnzB,EACP4P,MAAO7P,GAMTS,EAAQqQ,UAAUuhB,QAAU,WAC1B,GAAIpV,GAAQ/N,SAASM,cAAc,MACnCyN,GAAMzX,UAAY,UAClByX,EAAM,oBAAsBtf,KAC5BA,KAAK+vB,IAAIzQ,MAAQA,CAGjB,IAAInT,GAAaoF,SAASM,cAAc,MACxC1F,GAAWtE,UAAY,aACvByX,EAAM7N,YAAYtF,GAClBnM,KAAK+vB,IAAI5jB,WAAaA,CAGtB,IAAIsgC,GAAal7B,SAASM,cAAc,MACxC46B,GAAW5kC,UAAY,aACvByX,EAAM7N,YAAYg7B,GAClBzsC,KAAK+vB,IAAI0c,WAAaA,CAGtB,IAAI3L,GAAOvvB,SAASM,cAAc,MAClCivB,GAAKj5B,UAAY,OACjB7H,KAAK+vB,IAAI+Q,KAAOA,CAGhB,IAAI2M,GAAWl8B,SAASM,cAAc,MACtC47B,GAAS5lC,UAAY,WACrB7H,KAAK+vB,IAAI0d,SAAWA,EAGpBztC,KAAKywC,kBAGL,IAAIC,GAAkB,GAAI7tC,GAAgB0tC,EAAY,KAAMvwC,KAC5D0wC,GAAgB/I,OAChB3nC,KAAKm0B,OAAOoc,GAAcG,EAM1B1wC,KAAK8D,OAAS,GAAIC,GAAO/D,KAAK20B,KAAK5E,IAAI8H,iBAGvC73B,KAAK8D,OAAOyP,GAAG,eAAgB,SAAUjK,GACnCA,EAAMqnC,SACR3wC,KAAKq+B,SAAS/0B,IAEhBwrB,KAAK90B,OACPA,KAAK8D,OAAOyP,GAAG,WAAYvT,KAAKg+B,aAAalJ,KAAK90B,OAClDA,KAAK8D,OAAOyP,GAAG,UAAYvT,KAAKi+B,QAAQnJ,KAAK90B,OAC7CA,KAAK8D,OAAOyP,GAAG,SAAYvT,KAAKk+B,WAAWpJ,KAAK90B,OAGhDA,KAAK8D,OAAOyP,GAAG,MAAQvT,KAAK4wC,cAAc9b,KAAK90B,OAG/CA,KAAK8D,OAAOyP,GAAG,QAASvT,KAAK6wC,mBAAmB/b,KAAK90B,OAGrDA,KAAK8D,OAAOyP,GAAG,YAAavT,KAAK8wC,WAAWhc,KAAK90B,OAGjDA,KAAK2nC,QAmEP7kC,EAAQqQ,UAAUD,WAAa,SAASzE,GACtC,GAAIA,EAAS,CAEX,GAAIP,IAAU,OAAQ,QAAS,cAAe,UAAW,QAAS,aAAc,aAAc,iBAAkB,WAAW,OAC3HvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,GAEvC,UAAYA,KACgB,gBAAnBA,GAAQiL,QACjB1Z,KAAKyO,QAAQiL,OAAOonB,KAAOryB,EAAQiL,OACnC1Z,KAAKyO,QAAQiL,OAAOrK,KAAKmW,WAAa/W,EAAQiL,OAC9C1Z,KAAKyO,QAAQiL,OAAOrK,KAAKoW,SAAWhX,EAAQiL,QAEX,gBAAnBjL,GAAQiL,SACtB/Y,EAAKoF,iBAAiB,QAAS/F,KAAKyO,QAAQiL,OAAQjL,EAAQiL,QACxD,QAAUjL,GAAQiL,SACe,gBAAxBjL,GAAQiL,OAAOrK,MACxBrP,KAAKyO,QAAQiL,OAAOrK,KAAKmW,WAAa/W,EAAQiL,OAAOrK,KACrDrP,KAAKyO,QAAQiL,OAAOrK,KAAKoW,SAAWhX,EAAQiL,OAAOrK,MAEb,gBAAxBZ,GAAQiL,OAAOrK,MAC7B1O,EAAKoF,iBAAiB,aAAc,YAAa/F,KAAKyO,QAAQiL,OAAOrK,KAAMZ,EAAQiL,OAAOrK,SAM9F,YAAcZ,KACgB,iBAArBA,GAAQygC,UACjBlvC,KAAKyO,QAAQygC,SAASC,WAAc1gC,EAAQygC,SAC5ClvC,KAAKyO,QAAQygC,SAAS1H,YAAc/4B,EAAQygC,SAC5ClvC,KAAKyO,QAAQygC,SAASj8B,IAAcxE,EAAQygC,SAC5ClvC,KAAKyO,QAAQygC,SAAS74B,OAAc5H,EAAQygC,UAET,gBAArBzgC,GAAQygC,UACtBvuC,EAAKoF,iBAAiB,aAAc,cAAe,MAAO,UAAW/F,KAAKyO,QAAQygC,SAAUzgC,EAAQygC,UAKxG,IAAI6B,GAAc,SAAW96B,GAC3B,GAAIiD,GAAKzK,EAAQwH,EACjB,IAAIiD,EAAI,CACN,KAAMA,YAAc83B,WAClB,KAAM,IAAIptC,OAAM,UAAYqS,EAAO,uBAAyBA,EAAO,mBAErEjW,MAAKyO,QAAQwH,GAAQiD,IAEtB4b,KAAK90B,OACP,QAAS,WAAY,WAAY,SAAU,YAAYqI,QAAQ0oC,GAGhE/wC,KAAKixC,cAOTnuC,EAAQqQ,UAAU89B,UAAY,WAC5BjxC,KAAKkwC,YACLlwC,KAAKowC,YAAa,GAMpBttC,EAAQqQ,UAAUG,QAAU,WAC1BtT,KAAK0nC,OACL1nC,KAAKi2B,SAAS,MACdj2B,KAAKg2B,UAAU,MAEfh2B,KAAK8D,OAAS,KAEd9D,KAAK20B,KAAO,KACZ30B,KAAKo6B,WAAa,MAMpBt3B,EAAQqQ,UAAUu0B,KAAO,WAEnB1nC,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,OAI7Ctf,KAAK+vB,IAAI+Q,KAAKj3B,YAChB7J,KAAK+vB,IAAI+Q,KAAKj3B,WAAWsH,YAAYnR,KAAK+vB,IAAI+Q,MAI5C9gC,KAAK+vB,IAAI0d,SAAS5jC,YACpB7J,KAAK+vB,IAAI0d,SAAS5jC,WAAWsH,YAAYnR,KAAK+vB,IAAI0d,WAQtD3qC,EAAQqQ,UAAUw0B,KAAO,WAElB3nC,KAAK+vB,IAAIzQ,MAAMzV,YAClB7J,KAAK20B,KAAK5E,IAAI7D,OAAOza,YAAYzR,KAAK+vB,IAAIzQ,OAIvCtf,KAAK+vB,IAAI+Q,KAAKj3B,YACjB7J,KAAK20B,KAAK5E,IAAIgV,mBAAmBtzB,YAAYzR,KAAK+vB,IAAI+Q,MAInD9gC,KAAK+vB,IAAI0d,SAAS5jC,YACrB7J,KAAK20B,KAAK5E,IAAItoB,KAAKgK,YAAYzR,KAAK+vB,IAAI0d,WAW5C3qC,EAAQqQ,UAAUujB,aAAe,SAASvhB,GACxC,GAAI3P,GAAG8nC,EAAIjtC,EAAIgP,CAMf,KAJW7I,QAAP2O,IAAkBA,MACjBlP,MAAMC,QAAQiP,KAAMA,GAAOA,IAG3B3P,EAAI,EAAG8nC,EAAKttC,KAAKmwC,UAAUxqC,OAAY2nC,EAAJ9nC,EAAQA,IAC9CnF,EAAKL,KAAKmwC,UAAU3qC,GACpB6J,EAAOrP,KAAKiC,MAAM5B,GACdgP,GAAMA,EAAK6hC,UAKjB,KADAlxC,KAAKmwC,aACA3qC,EAAI,EAAG8nC,EAAKn4B,EAAIxP,OAAY2nC,EAAJ9nC,EAAQA,IACnCnF,EAAK8U,EAAI3P,GACT6J,EAAOrP,KAAKiC,MAAM5B,GACdgP,IACFrP,KAAKmwC,UAAUnoC,KAAK3H,GACpBgP,EAAK8hC,WASXruC,EAAQqQ,UAAUyjB,aAAe,WAC/B,MAAO52B,MAAKmwC,UAAUn8B,YAOxBlR,EAAQqQ,UAAUi+B,gBAAkB,WAClC,GAAI3b,GAAQz1B,KAAK20B,KAAKc,MAAM2J,WACxB33B,EAAQzH,KAAK20B,KAAKh0B,KAAKs0B,SAASQ,EAAM7lB,OACtCyX,EAAQrnB,KAAK20B,KAAKh0B,KAAKs0B,SAASQ,EAAM5lB,KAEtCsF,IACJ,KAAK,GAAImiB,KAAWt3B,MAAKm0B,OACvB,GAAIn0B,KAAKm0B,OAAOruB,eAAewxB,GAM7B,IAAK,GALDrlB,GAAQjS,KAAKm0B,OAAOmD,GACpB+Z,EAAkBp/B,EAAMm6B,aAInB5mC,EAAI,EAAGA,EAAI6rC,EAAgB1rC,OAAQH,IAAK,CAC/C,GAAI6J,GAAOgiC,EAAgB7rC,EAEtB6J,GAAK5H,KAAO4f,GAAWhY,EAAK5H,KAAO4H,EAAKkD,MAAQ9K,GACnD0N,EAAInN,KAAKqH,EAAKhP,IAMtB,MAAO8U,IAQTrS,EAAQqQ,UAAUm+B,UAAY,SAASjxC,GAErC,IAAK,GADD8vC,GAAYnwC,KAAKmwC,UACZ3qC,EAAI,EAAG8nC,EAAK6C,EAAUxqC,OAAY2nC,EAAJ9nC,EAAQA,IAC7C,GAAI2qC,EAAU3qC,IAAMnF,EAAI,CACtB8vC,EAAU/nC,OAAO5C,EAAG,EACpB,SASN1C,EAAQqQ,UAAUsO,OAAS,WACzB,GAAI/H,GAAS1Z,KAAKyO,QAAQiL,OACtB+b,EAAQz1B,KAAK20B,KAAKc,MAClBtrB,EAASxJ,EAAKmJ,OAAOK,OACrBsE,EAAUzO,KAAKyO,QACf8lB,EAAc9lB,EAAQ8lB,YACtBkQ,GAAU,EACVnlB,EAAQtf,KAAK+vB,IAAIzQ,MACjB4vB,EAAWzgC,EAAQygC,SAASC,YAAc1gC,EAAQygC,SAAS1H,WAG/DxnC,MAAKgG,MAAM2B,IAAM3H,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAASxS,KAAK20B,KAAKC,SAASxoB,OAAOzE,IAC3E3H,KAAKgG,MAAMyB,KAAOzH,KAAK20B,KAAKC,SAASntB,KAAK8K,MAAQvS,KAAK20B,KAAKC,SAASxoB,OAAO3E,KAG5E6X,EAAMzX,UAAY,WAAaqnC,EAAW,YAAc,IAGxDzK,EAAUzkC,KAAKuxC,gBAAkB9M,CAIjC,IAAI+M,GAAkB/b,EAAM5lB,IAAM4lB,EAAM7lB,MACpC6hC,EAAUD,GAAmBxxC,KAAK0xC,qBAAyB1xC,KAAKgG,MAAMuM,OAASvS,KAAKgG,MAAM2rC,SAC1FF,KAAQzxC,KAAKowC,YAAa,GAC9BpwC,KAAK0xC,oBAAsBF,EAC3BxxC,KAAKgG,MAAM2rC,UAAY3xC,KAAKgG,MAAMuM,KAElC,IAAIs6B,GAAU7sC,KAAKowC,WACfwB,EAAa5xC,KAAK6xC,cAClBC,GACFziC,KAAMqK,EAAOrK,KACbyxB,KAAMpnB,EAAOonB,MAEXiR,GACF1iC,KAAMqK,EAAOrK,KACbyxB,KAAMpnB,EAAOrK,KAAKoW,SAAW,GAE3BjT,EAAS,EACTiiB,EAAY/a,EAAOonB,KAAOpnB,EAAOrK,KAAKoW,QA+B1C,OA5BAzlB,MAAKm0B,OAAOoc,GAAY9uB,OAAOgU,EAAOsc,EAAgBlF,GAGtDlsC,EAAK0H,QAAQrI,KAAKm0B,OAAQ,SAAUliB,GAClC,GAAI+/B,GAAe//B,GAAS2/B,EAAcE,EAAcC,EACpDE,EAAehgC,EAAMwP,OAAOgU,EAAOuc,EAAanF,EACpDpI,GAAUwN,GAAgBxN,EAC1BjyB,GAAUP,EAAMO,SAElBA,EAAStN,KAAKwH,IAAI8F,EAAQiiB,GAC1Bz0B,KAAKowC,YAAa,EAGlB9wB,EAAMrS,MAAMuF,OAAUrI,EAAOqI,GAG7BxS,KAAKgG,MAAMuM,MAAQ+M,EAAM8Q,YACzBpwB,KAAKgG,MAAMwM,OAASA,EAGpBxS,KAAK+vB,IAAI+Q,KAAK7zB,MAAMtF,IAAMwC,EAAuB,OAAfoqB,EAC7Bv0B,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAASxS,KAAK20B,KAAKC,SAASxoB,OAAOzE,IAC1D3H,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,QACxExS,KAAK+vB,IAAI+Q,KAAK7zB,MAAMxF,KAAO,IAG3Bg9B,EAAUzkC,KAAKwkC,cAAgBC,GAUjC3hC,EAAQqQ,UAAU0+B,YAAc,WAC9B,GAAIK,GAA+C,OAA5BlyC,KAAKyO,QAAQ8lB,YAAwB,EAAKv0B,KAAKkwC,SAASvqC,OAAS,EACpFwsC,EAAenyC,KAAKkwC,SAASgC,GAC7BN,EAAa5xC,KAAKm0B,OAAOge,IAAiBnyC,KAAKm0B,OAAOmc,EAE1D,OAAOsB,IAAc,MAQvB9uC,EAAQqQ,UAAUs9B,iBAAmB,WACnC,CAAA,GAEIphC,GAAMkG,EAFN68B,EAAYpyC,KAAKm0B,OAAOmc,EACXtwC,MAAKm0B,OAAOoc,GAG7B,GAAIvwC,KAAK+1B,YAEP,GAAIqc,EAAW,CACbA,EAAU1K,aACH1nC,MAAKm0B,OAAOmc,EAEnB,KAAK/6B,IAAUvV,MAAKiC,MAClB,GAAIjC,KAAKiC,MAAM6D,eAAeyP,GAAS,CACrClG,EAAOrP,KAAKiC,MAAMsT,GAClBlG,EAAKy1B,QAAUz1B,EAAKy1B,OAAOzuB,OAAOhH,EAClC,IAAIioB,GAAUt3B,KAAKqyC,YAAYhjC,EAAKqD,MAChCT,EAAQjS,KAAKm0B,OAAOmD,EACxBrlB,IAASA,EAAMgB,IAAI5D,IAASA,EAAKq4B,aAOvC,KAAK0K,EAAW,CACd,GAAI/xC,GAAK,KACLqS,EAAO,IACX0/B,GAAY,GAAIxvC,GAAMvC,EAAIqS,EAAM1S,MAChCA,KAAKm0B,OAAOmc,GAAa8B,CAEzB,KAAK78B,IAAUvV,MAAKiC,MACdjC,KAAKiC,MAAM6D,eAAeyP,KAC5BlG,EAAOrP,KAAKiC,MAAMsT,GAClB68B,EAAUn/B,IAAI5D,GAIlB+iC,GAAUzK,SAShB7kC,EAAQqQ,UAAUm/B,YAAc,WAC9B,MAAOtyC,MAAK+vB,IAAI0d,UAOlB3qC,EAAQqQ,UAAU8iB,SAAW,SAASh0B,GACpC,GACIkT,GADAhB,EAAKnU,KAELuyC,EAAevyC,KAAK81B,SAGxB,IAAK7zB,EAGA,CAAA,KAAIA,YAAiBpB,IAAWoB,YAAiBnB,IAIpD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK81B,UAAY7zB,MAHjBjC,MAAK81B,UAAY,IAoBnB,IAXIyc,IAEF5xC,EAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnDipC,EAAa7+B,IAAIpK,EAAOhB,KAI1B6M,EAAMo9B,EAAa18B,SACnB7V,KAAK6vC,UAAU16B,IAGbnV,KAAK81B,UAAW,CAElB,GAAIz1B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnD6K,EAAG2hB,UAAUviB,GAAGjK,EAAOhB,EAAUjI,KAInC8U,EAAMnV,KAAK81B,UAAUjgB,SACrB7V,KAAK2vC,OAAOx6B,GAGZnV,KAAKywC,qBAQT3tC,EAAQqQ,UAAUq/B,SAAW,WAC3B,MAAOxyC,MAAK81B,WAOdhzB,EAAQqQ,UAAU6iB,UAAY,SAAS7B,GACrC,GACIhf,GADAhB,EAAKnU,IAgBT,IAZIA,KAAK+1B,aACPp1B,EAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWniB,YAAYtK,EAAOhB,KAInC6M,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+1B,WAAa,KAClB/1B,KAAKiwC,gBAAgB96B,IAIlBgf,EAGA,CAAA,KAAIA,YAAkBtzB,IAAWszB,YAAkBrzB,IAItD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK+1B,WAAa5B,MAHlBn0B,MAAK+1B,WAAa,IASpB,IAAI/1B,KAAK+1B,WAAY,CAEnB,GAAI11B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWxiB,GAAGjK,EAAOhB,EAAUjI,KAIpC8U,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+vC,aAAa56B,GAIpBnV,KAAKywC,mBAGLzwC,KAAKyyC,SAELzyC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAO3CtQ,EAAQqQ,UAAUu/B,UAAY,WAC5B,MAAO1yC,MAAK+1B,YAOdjzB,EAAQqQ,UAAU66B,WAAa,SAAS3tC,GACtC,GAAIgP,GAAOrP,KAAK81B,UAAU5gB,IAAI7U,GAC1B22B,EAAUh3B,KAAK81B,UAAUhgB,YAEzBzG,IAEFrP,KAAKyO,QAAQ8gC,SAASlgC,EAAM,SAAUA,GAChCA,GAGF2nB,EAAQ3gB,OAAOhW,MAYvByC,EAAQqQ,UAAUw/B,SAAW,SAAU9b,GACrC,MAAOA,GAAS/vB,MAAQ9G,KAAKyO,QAAQ3H,OAAS+vB,EAAShnB,IAAM,QAAU,QAUzE/M,EAAQqQ,UAAUk/B,YAAc,SAAUxb,GACxC,GAAI/vB,GAAO9G,KAAK2yC,SAAS9b,EACzB,OAAY,cAAR/vB,GAA0CN,QAAlBqwB,EAAS5kB,MAC7Bs+B,EAGCvwC,KAAK+1B,WAAac,EAAS5kB,MAAQq+B,GAS9CxtC,EAAQqQ,UAAUy8B,UAAY,SAASz6B,GACrC,GAAIhB,GAAKnU,IAETmV,GAAI9M,QAAQ,SAAUhI,GACpB,GAAIw2B,GAAW1iB,EAAG2hB,UAAU5gB,IAAI7U,EAAI8T,EAAGs7B,aACnCpgC,EAAO8E,EAAGlS,MAAM5B,GAChByG,EAAOqN,EAAGw+B,SAAS9b,GAEnBvwB,EAAcxD,EAAQoU,MAAMpQ,EAchC,IAZIuI,IAEG/I,GAAiB+I,YAAgB/I,GAMpC6N,EAAGc,YAAY5F,EAAMwnB,IAJrB1iB,EAAGy+B,YAAYvjC,GACfA,EAAO,QAONA,EAAM,CAET,IAAI/I,EAKC,KAEG,IAAID,WAFK,iBAARS,EAEa,4HAIA,sBAAwBA,EAAO,IAVnDuI,GAAO,GAAI/I,GAAYuwB,EAAU1iB,EAAGimB,WAAYjmB,EAAG1F,SACnDY,EAAKhP,GAAKA,EACV8T,EAAGC,SAAS/E,MAalBrP,KAAKyyC,SACLzyC,KAAKowC,YAAa,EAClBpwC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAQ3CtQ,EAAQqQ,UAAUw8B,OAAS7sC,EAAQqQ,UAAUy8B,UAO7C9sC,EAAQqQ,UAAU08B,UAAY,SAAS16B,GACrC,GAAI6B,GAAQ,EACR7C,EAAKnU,IACTmV,GAAI9M,QAAQ,SAAUhI,GACpB,GAAIgP,GAAO8E,EAAGlS,MAAM5B,EAChBgP,KACF2H,IACA7C,EAAGy+B,YAAYvjC,MAIf2H,IAEFhX,KAAKyyC,SACLzyC,KAAKowC,YAAa,EAClBpwC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,MAQ7CtQ,EAAQqQ,UAAUs/B,OAAS,WAGzB9xC,EAAK0H,QAAQrI,KAAKm0B,OAAQ,SAAUliB,GAClCA,EAAMwD,WASV3S,EAAQqQ,UAAU68B,gBAAkB,SAAS76B,GAC3CnV,KAAK+vC,aAAa56B,IAQpBrS,EAAQqQ,UAAU48B,aAAe,SAAS56B,GACxC,GAAIhB,GAAKnU,IAETmV,GAAI9M,QAAQ,SAAUhI,GACpB,GAAIyrC,GAAY33B,EAAG4hB,WAAW7gB,IAAI7U,GAC9B4R,EAAQkC,EAAGggB,OAAO9zB,EAEtB,IAAK4R,EA6BHA,EAAM+F,QAAQ8zB,OA7BJ,CAEV,GAAIzrC,GAAMiwC,GAAajwC,GAAMkwC,EAC3B,KAAM,IAAI3sC,OAAM,qBAAuBvD,EAAK,qBAG9C,IAAIwyC,GAAetsC,OAAO8H,OAAO8F,EAAG1F,QACpC9N,GAAK2E,OAAOutC,GACVrgC,OAAQ,OAGVP,EAAQ,GAAIrP,GAAMvC,EAAIyrC,EAAW33B,GACjCA,EAAGggB,OAAO9zB,GAAM4R,CAGhB,KAAK,GAAIsD,KAAUpB,GAAGlS,MACpB,GAAIkS,EAAGlS,MAAM6D,eAAeyP,GAAS,CACnC,GAAIlG,GAAO8E,EAAGlS,MAAMsT,EAChBlG,GAAKqD,KAAKT,OAAS5R,GACrB4R,EAAMgB,IAAI5D,GAKhB4C,EAAMwD,QACNxD,EAAM01B,UAQV3nC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAQ3CtQ,EAAQqQ,UAAU88B,gBAAkB,SAAS96B,GAC3C,GAAIgf,GAASn0B,KAAKm0B,MAClBhf,GAAI9M,QAAQ,SAAUhI,GACpB,GAAI4R,GAAQkiB,EAAO9zB,EAEf4R,KACFA,EAAMy1B,aACCvT,GAAO9zB,MAIlBL,KAAKixC,YAELjxC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAQ3CtQ,EAAQqQ,UAAUo+B,aAAe,WAC/B,GAAIvxC,KAAK+1B,WAAY,CAEnB,GAAIma,GAAWlwC,KAAK+1B,WAAWlgB,QAC7BJ,MAAOzV,KAAKyO,QAAQugC,aAGlB/P,GAAWt+B,EAAKiG,WAAWspC,EAAUlwC,KAAKkwC,SAC9C,IAAIjR,EAAS,CAEX,GAAI9K,GAASn0B,KAAKm0B,MAClB+b,GAAS7nC,QAAQ,SAAUivB,GACzBnD,EAAOmD,GAASoQ,SAIlBwI,EAAS7nC,QAAQ,SAAUivB,GACzBnD,EAAOmD,GAASqQ,SAGlB3nC,KAAKkwC,SAAWA,EAGlB,MAAOjR,GAGP,OAAO,GASXn8B,EAAQqQ,UAAUiB,SAAW,SAAS/E,GACpCrP,KAAKiC,MAAMoN,EAAKhP,IAAMgP,CAGtB,IAAIioB,GAAUt3B,KAAKqyC,YAAYhjC,EAAKqD,MAChCT,EAAQjS,KAAKm0B,OAAOmD,EACpBrlB,IAAOA,EAAMgB,IAAI5D,IASvBvM,EAAQqQ,UAAU8B,YAAc,SAAS5F,EAAMwnB,GAC7C,GAAIic,GAAazjC,EAAKqD,KAAKT,KAM3B,IAHA5C,EAAK2I,QAAQ6e,GAGTic,GAAczjC,EAAKqD,KAAKT,MAAO,CACjC,GAAI8gC,GAAW/yC,KAAKm0B,OAAO2e,EACvBC,IAAUA,EAAS18B,OAAOhH,EAE9B,IAAIioB,GAAUt3B,KAAKqyC,YAAYhjC,EAAKqD,MAChCT,EAAQjS,KAAKm0B,OAAOmD,EACpBrlB,IAAOA,EAAMgB,IAAI5D,KAUzBvM,EAAQqQ,UAAUy/B,YAAc,SAASvjC,GAEvCA,EAAKq4B,aAGE1nC,MAAKiC,MAAMoN,EAAKhP,GAGvB,IAAI8H,GAAQnI,KAAKmwC,UAAUxpC,QAAQ0I,EAAKhP,GAC3B,KAAT8H,GAAanI,KAAKmwC,UAAU/nC,OAAOD,EAAO,GAG9CkH,EAAKy1B,QAAUz1B,EAAKy1B,OAAOzuB,OAAOhH,IASpCvM,EAAQqQ,UAAU6/B,qBAAuB,SAASxqC,GAGhD,IAAK,GAFD0lC,MAEK1oC,EAAI,EAAGA,EAAIgD,EAAM7C,OAAQH,IAC5BgD,EAAMhD,YAAclD,IACtB4rC,EAASlmC,KAAKQ,EAAMhD,GAGxB,OAAO0oC,IAaTprC,EAAQqQ,UAAUkrB,SAAW,SAAU/0B,GAErCtJ,KAAKqwC,YAAYhhC,KAAOvM,EAAQmwC,eAAe3pC,GAC/CtJ,KAAKqwC,YAAY6C,aAAe5pC,EAAMI,OAAOwpC,eAAgB,EAC7DlzC,KAAKqwC,YAAY8C,cAAgB7pC,EAAMI,OAAOypC,gBAAiB,EAC/DnzC,KAAKqwC,YAAY+C,UAAY,MAQ/BtwC,EAAQqQ,UAAU6qB,aAAe,SAAU10B,GACzC,GAAKtJ,KAAKyO,QAAQygC,SAASC,YAAenvC,KAAKyO,QAAQygC,SAAS1H,YAAhE,CAIA,GAEIxhC,GAFAqJ,EAAOrP,KAAKqwC,YAAYhhC,MAAQ,KAChC8E,EAAKnU,IAGT,IAAIqP,GAAQA,EAAKgkC,SAAU,CACzB,GAAIH,GAAelzC,KAAKqwC,YAAY6C,aAChCC,EAAgBnzC,KAAKqwC,YAAY8C,aAEjCD,IACFltC,GACEqJ,KAAM6jC,EACNI,SAAUhqC,EAAM4iB,OAAOna,GAGrBoC,EAAG1F,QAAQygC,SAASC,aACtBnpC,EAAM4J,MAAQP,EAAKqD,KAAK9C,MAAM5I,WAE5BmN,EAAG1F,QAAQygC,SAAS1H,aAClB,SAAWn4B,GAAKqD,OAAM1M,EAAMiM,MAAQ5C,EAAKqD,KAAKT,OAGpDjS,KAAKqwC,YAAY+C,WAAaptC,IAEvBmtC,GACPntC,GACEqJ,KAAM8jC,EACNG,SAAUhqC,EAAM4iB,OAAOna,GAGrBoC,EAAG1F,QAAQygC,SAASC,aACtBnpC,EAAM6J,IAAMR,EAAKqD,KAAK7C,IAAI7I,WAExBmN,EAAG1F,QAAQygC,SAAS1H,aAClB,SAAWn4B,GAAKqD,OAAM1M,EAAMiM,MAAQ5C,EAAKqD,KAAKT,OAGpDjS,KAAKqwC,YAAY+C,WAAaptC,IAG9BhG,KAAKqwC,YAAY+C,UAAYpzC,KAAK42B,eAAevpB,IAAI,SAAUhN,GAC7D,GAAIgP,GAAO8E,EAAGlS,MAAM5B,GAChB2F,GACFqJ,KAAMA,EACNikC,SAAUhqC,EAAM4iB,OAAOna,EAWzB,OARIoC,GAAG1F,QAAQygC,SAASC,aAClB,SAAW9/B,GAAKqD,OAAM1M,EAAM4J,MAAQP,EAAKqD,KAAK9C,MAAM5I,WACpD,OAASqI,GAAKqD,OAAQ1M,EAAM6J,IAAMR,EAAKqD,KAAK7C,IAAI7I,YAElDmN,EAAG1F,QAAQygC,SAAS1H,aAClB,SAAWn4B,GAAKqD,OAAM1M,EAAMiM,MAAQ5C,EAAKqD,KAAKT,OAG7CjM,IAIXsD,EAAMq8B,kBACNr8B,EAAMD,oBASVvG,EAAQqQ,UAAU8qB,QAAU,SAAU30B,GAGpC,GAFAA,EAAMD,iBAEFrJ,KAAKqwC,YAAY+C,UAAW,CAC9B,GAAIj/B,GAAKnU,KACLg1B,EAAOh1B,KAAK20B,KAAKh0B,KAAKq0B,MAAQ,KAC9BpL,EAAU5pB,KAAK20B,KAAK5E,IAAIrwB,KAAK2tC,WAAartC,KAAK20B,KAAKC,SAASntB,KAAK8K,KAGtEvS,MAAKqwC,YAAY+C,UAAU/qC,QAAQ,SAAUrC,GAC3C,GAAIutC,MACAzZ,EAAU3lB,EAAGwgB,KAAKh0B,KAAK00B,OAAO/rB,EAAM4iB,OAAOna,EAAI6X,GAC/C4pB,EAAUr/B,EAAGwgB,KAAKh0B,KAAK00B,OAAOrvB,EAAMstC,SAAW1pB,GAC/CD,EAASmQ,EAAU0Z,CAEvB,IAAI,SAAWxtC,GAAO,CACpB,GAAI4J,GAAQ,GAAItL,MAAK0B,EAAM4J,MAAQ+Z,EACnC4pB,GAAS3jC,MAAQolB,EAAOA,EAAKplB,GAASA,EAGxC,GAAI,OAAS5J,GAAO,CAClB,GAAI6J,GAAM,GAAIvL,MAAK0B,EAAM6J,IAAM8Z,EAC/B4pB,GAAS1jC,IAAMmlB,EAAOA,EAAKnlB,GAAOA,EAGpC,GAAI,SAAW7J,GAAO,CAEpB,GAAIiM,GAAQnP,EAAQ2wC,gBAAgBnqC,EACpCiqC,GAASthC,MAAQA,GAASA,EAAMqlB,QAIlC,GAAIT,GAAWl2B,EAAK2E,UAAWU,EAAMqJ,KAAKqD,KAAM6gC,EAChDp/B,GAAG1F,QAAQ+gC,SAAS3Y,EAAU,SAAUA,GAClCA,GACF1iB,EAAGu/B,iBAAiB1tC,EAAMqJ,KAAMwnB,OAKtC72B,KAAKowC,YAAa,EAClBpwC,KAAK20B,KAAKE,QAAQjH,KAAK,UAEvBtkB,EAAMq8B,oBAUV7iC,EAAQqQ,UAAUugC,iBAAmB,SAASrkC,EAAMrJ,GAE9C,SAAWA,KAAOqJ,EAAKqD,KAAK9C,MAAQ5J,EAAM4J,OAC1C,OAAS5J,KAASqJ,EAAKqD,KAAK7C,IAAQ7J,EAAM6J,KAC1C,SAAW7J,IAASqJ,EAAKqD,KAAKT,OAASjM,EAAMiM,OAC/CjS,KAAK2zC,aAAatkC,EAAMrJ,EAAMiM,QAUlCnP,EAAQqQ,UAAUwgC,aAAe,SAAStkC,EAAMioB,GAC9C,GAAIrlB,GAAQjS,KAAKm0B,OAAOmD,EACxB,IAAIrlB,GAASA,EAAMqlB,SAAWjoB,EAAKqD,KAAKT,MAAO,CAC7C,GAAI8gC,GAAW1jC,EAAKy1B,MACpBiO,GAAS18B,OAAOhH,GAChB0jC,EAASt9B,QACTxD,EAAMgB,IAAI5D,GACV4C,EAAMwD,QAENpG,EAAKqD,KAAKT,MAAQA,EAAMqlB,UAS5Bx0B,EAAQqQ,UAAU+qB,WAAa,SAAU50B,GACvC,GAAItJ,KAAKqwC,YAAY+C,UAAW,CAE9B,GAAIQ,MACAz/B,EAAKnU,KACLg3B,EAAUh3B,KAAK81B,UAAUhgB,aAEzBs9B,EAAYpzC,KAAKqwC,YAAY+C,SACjCpzC,MAAKqwC,YAAY+C,UAAY,KAC7BA,EAAU/qC,QAAQ,SAAUrC,GAC1B,GAAI3F,GAAK2F,EAAMqJ,KAAKhP,GAChBw2B,EAAW1iB,EAAG2hB,UAAU5gB,IAAI7U,EAAI8T,EAAGs7B,aAEnCxQ,GAAU,CACV,UAAWj5B,GAAMqJ,KAAKqD,OACxBusB,EAAWj5B,EAAM4J,OAAS5J,EAAMqJ,KAAKqD,KAAK9C,MAAM5I,UAChD6vB,EAASjnB,MAAQjP,EAAKkG,QAAQb,EAAMqJ,KAAKqD,KAAK9C,MACtConB,EAAQrkB,SAAS7L,MAAQkwB,EAAQrkB,SAAS7L,KAAK8I,OAAS,SAE9D,OAAS5J,GAAMqJ,KAAKqD,OACtBusB,EAAUA,GAAaj5B,EAAM6J,KAAO7J,EAAMqJ,KAAKqD,KAAK7C,IAAI7I,UACxD6vB,EAAShnB,IAAMlP,EAAKkG,QAAQb,EAAMqJ,KAAKqD,KAAK7C,IACpCmnB,EAAQrkB,SAAS7L,MAAQkwB,EAAQrkB,SAAS7L,KAAK+I,KAAO,SAE5D,SAAW7J,GAAMqJ,KAAKqD,OACxBusB,EAAUA,GAAaj5B,EAAMiM,OAASjM,EAAMqJ,KAAKqD,KAAKT,MACtD4kB,EAAS5kB,MAAQjM,EAAMqJ,KAAKqD,KAAKT,OAI/BgtB,GACF9qB,EAAG1F,QAAQ6gC,OAAOzY,EAAU,SAAUA,GAChCA,GAEFA,EAASG,EAAQnkB,UAAYxS,EAC7BuzC,EAAQ5rC,KAAK6uB,KAIb1iB,EAAGu/B,iBAAiB1tC,EAAMqJ,KAAMrJ,GAEhCmO,EAAGi8B,YAAa,EAChBj8B,EAAGwgB,KAAKE,QAAQjH,KAAK,eAOzBgmB,EAAQjuC,QACVqxB,EAAQniB,OAAO++B,GAGjBtqC,EAAMq8B,oBASV7iC,EAAQqQ,UAAUy9B,cAAgB,SAAUtnC,GAC1C,GAAKtJ,KAAKyO,QAAQwgC,WAAlB,CAEA,GAAI4E,GAAWvqC,EAAMwqC,UAAYxqC,EAAMwqC,SAASD,QAC5CE,EAAWzqC,EAAMwqC,UAAYxqC,EAAMwqC,SAASC,QAChD,IAAIF,GAAWE,EAEb,WADA/zC,MAAK6wC,mBAAmBvnC,EAI1B,IAAI0qC,GAAeh0C,KAAK42B,eAEpBvnB,EAAOvM,EAAQmwC,eAAe3pC,GAC9B6mC,EAAY9gC,GAAQA,EAAKhP,MAC7BL,MAAK02B,aAAayZ,EAElB,IAAI8D,GAAej0C,KAAK42B,gBAIpBqd,EAAatuC,OAAS,GAAKquC,EAAaruC,OAAS,IACnD3F,KAAK20B,KAAKE,QAAQjH,KAAK,UACrB3rB,MAAOgyC,MAUbnxC,EAAQqQ,UAAU29B,WAAa,SAAUxnC,GACvC,GAAKtJ,KAAKyO,QAAQwgC,YACbjvC,KAAKyO,QAAQygC,SAASj8B,IAA3B,CAEA,GAAIkB,GAAKnU,KACLg1B,EAAOh1B,KAAK20B,KAAKh0B,KAAKq0B,MAAQ,KAC9B3lB,EAAOvM,EAAQmwC,eAAe3pC,EAElC,IAAI+F,EAAM,CAIR,GAAIwnB,GAAW1iB,EAAG2hB,UAAU5gB,IAAI7F,EAAKhP,GACrCL,MAAKyO,QAAQ4gC,SAASxY,EAAU,SAAUA,GACpCA,GACF1iB,EAAG2hB,UAAUhgB,aAAajB,OAAOgiB,SAIlC,CAEH,GAAIqd,GAAOvzC,EAAK2G,gBAAgBtH,KAAK+vB,IAAIzQ,OACrCvN,EAAIzI,EAAM4iB,OAAOna,EAAImiC,EACrBtkC,EAAQ5P,KAAK20B,KAAKh0B,KAAK00B,OAAOtjB,GAC9BoiC,GACFvkC,MAAOolB,EAAOA,EAAKplB,GAASA,EAC5BggB,QAAS,WAIX,IAA0B,UAAtB5vB,KAAKyO,QAAQ3H,KAAkB,CACjC,GAAI+I,GAAM7P,KAAK20B,KAAKh0B,KAAK00B,OAAOtjB,EAAI/R,KAAKgG,MAAMuM,MAAQ,EACvD4hC,GAAQtkC,IAAMmlB,EAAOA,EAAKnlB,GAAOA,EAGnCskC,EAAQn0C,KAAK81B,UAAUjjB,UAAYlS,EAAKqE,YAExC,IAAIiN,GAAQnP,EAAQ2wC,gBAAgBnqC,EAChC2I,KACFkiC,EAAQliC,MAAQA,EAAMqlB,SAIxBt3B,KAAKyO,QAAQ2gC,MAAM+E,EAAS,SAAU9kC,GAChCA,GACF8E,EAAG2hB,UAAUhgB,aAAa7C,IAAI5D,QAYtCvM,EAAQqQ,UAAU09B,mBAAqB,SAAUvnC,GAC/C,GAAKtJ,KAAKyO,QAAQwgC,WAAlB,CAEA,GAAIkB,GACA9gC,EAAOvM,EAAQmwC,eAAe3pC,EAElC,IAAI+F,EAAM,CAER8gC,EAAYnwC,KAAK42B,cAEjB,IAAImd,GAAWzqC,EAAMwqC,UAAYxqC,EAAMwqC,SAASC,WAAY,CAC5D,IAAIA,EAAU,CAIZ5D,EAAUnoC,KAAKqH,EAAKhP,GACpB,IAAIo1B,GAAQ3yB,EAAQsxC,cAAcp0C,KAAK81B,UAAU5gB,IAAIi7B,EAAWnwC,KAAKyvC,aAGrEU,KACA,KAAK,GAAI9vC,KAAML,MAAKiC,MAClB,GAAIjC,KAAKiC,MAAM6D,eAAezF,GAAK,CACjC,GAAIg0C,GAAQr0C,KAAKiC,MAAM5B,GACnBuP,EAAQykC,EAAM3hC,KAAK9C,MACnBC,EAA0BrJ,SAAnB6tC,EAAM3hC,KAAK7C,IAAqBwkC,EAAM3hC,KAAK7C,IAAMD,CAExDA,IAAS6lB,EAAM3pB,KAAO+D,GAAO4lB,EAAM/oB,KACrCyjC,EAAUnoC,KAAKqsC,EAAMh0C,SAKxB,CAEH,GAAI8H,GAAQgoC,EAAUxpC,QAAQ0I,EAAKhP,GACtB,KAAT8H,EAEFgoC,EAAUnoC,KAAKqH,EAAKhP,IAIpB8vC,EAAU/nC,OAAOD,EAAO,GAI5BnI,KAAK02B,aAAayZ,GAElBnwC,KAAK20B,KAAKE,QAAQjH,KAAK,UACrB3rB,MAAOjC,KAAK42B,oBAWlB9zB,EAAQsxC,cAAgB,SAASte,GAC/B,GAAIppB,GAAM,KACNZ,EAAM,IAmBV,OAjBAgqB,GAAUztB,QAAQ,SAAUqK,IACf,MAAP5G,GAAe4G,EAAK9C,MAAQ9D,KAC9BA,EAAM4G,EAAK9C,OAGGpJ,QAAZkM,EAAK7C,KACI,MAAPnD,GAAegG,EAAK7C,IAAMnD,KAC5BA,EAAMgG,EAAK7C,MAIF,MAAPnD,GAAegG,EAAK9C,MAAQlD,KAC9BA,EAAMgG,EAAK9C,UAMf9D,IAAKA,EACLY,IAAKA,IAUT5J,EAAQmwC,eAAiB,SAAS3pC,GAEhC,IADA,GAAII,GAASJ,EAAMI,OACZA,GAAQ,CACb,GAAIA,EAAO5D,eAAe,iBACxB,MAAO4D,GAAO,gBAEhBA,GAASA,EAAOG,WAGlB,MAAO,OAST/G,EAAQ2wC,gBAAkB,SAASnqC,GAEjC,IADA,GAAII,GAASJ,EAAMI,OACZA,GAAQ,CACb,GAAIA,EAAO5D,eAAe,kBACxB,MAAO4D,GAAO,iBAEhBA,GAASA,EAAOG,WAGlB,MAAO,OAST/G,EAAQwxC,kBAAoB,SAAShrC,GAEnC,IADA,GAAII,GAASJ,EAAMI,OACZA,GAAQ,CACb,GAAIA,EAAO5D,eAAe,oBACxB,MAAO4D,GAAO,mBAEhBA,GAASA,EAAOG,WAGlB,MAAO,OAGThK,EAAOD,QAAUkD,GAKb,SAASjD,EAAQD,EAASM,GAS9B,QAAS6C,GAAO4xB,EAAMlmB,EAAS8lC,EAAM1O,GACnC7lC,KAAK20B,KAAOA,EACZ30B,KAAKq0B,gBACH3lB,SAAS,EACTs3B,OAAO,EACPwO,SAAU,GACVC,YAAa,EACbhtC,MACEihB,SAAS,EACT9E,SAAU,YAEZyD,OACEqB,SAAS,EACT9E,SAAU,aAGd5jB,KAAKu0C,KAAOA,EACZv0C,KAAKyO,QAAU9N,EAAK2E,UAAUtF,KAAKq0B,gBACnCr0B,KAAK6lC,iBAAmBA,EAExB7lC,KAAKinC,eACLjnC,KAAK+vB,OACL/vB,KAAKm0B,UACLn0B,KAAKmnC,eAAiB,EACtBnnC,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GAjClB,GAAI9N,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9BqC,EAAYrC,EAAoB,GAkCpC6C,GAAOoQ,UAAY,GAAI5Q,GAEvBQ,EAAOoQ,UAAUsD,MAAQ,WACvBzW,KAAKm0B,UACLn0B,KAAKmnC,eAAiB,GAGxBpkC,EAAOoQ,UAAUm0B,SAAW,SAAS7e,EAAO8e,GAErCvnC,KAAKm0B,OAAOruB,eAAe2iB,KAC9BzoB,KAAKm0B,OAAO1L,GAAS8e,GAEvBvnC,KAAKmnC,gBAAkB,GAGzBpkC,EAAOoQ,UAAUq0B,YAAc,SAAS/e,EAAO8e,GAC7CvnC,KAAKm0B,OAAO1L,GAAS8e,GAGvBxkC,EAAOoQ,UAAUs0B,YAAc,SAAShf,GAClCzoB,KAAKm0B,OAAOruB,eAAe2iB,WACtBzoB,MAAKm0B,OAAO1L,GACnBzoB,KAAKmnC,gBAAkB,IAI3BpkC,EAAOoQ,UAAUuhB,QAAU,WACzB10B,KAAK+vB,IAAIzQ,MAAQ/N,SAASM,cAAc,OACxC7R,KAAK+vB,IAAIzQ,MAAMzX,UAAY,SAC3B7H,KAAK+vB,IAAIzQ,MAAMrS,MAAM2W,SAAW,WAChC5jB,KAAK+vB,IAAIzQ,MAAMrS,MAAMtF,IAAM,OAC3B3H,KAAK+vB,IAAIzQ,MAAMrS,MAAM26B,QAAU,QAE/B5nC,KAAK+vB,IAAI2kB,SAAWnjC,SAASM,cAAc,OAC3C7R,KAAK+vB,IAAI2kB,SAAS7sC,UAAY,aAC9B7H,KAAK+vB,IAAI2kB,SAASznC,MAAM2W,SAAW,WACnC5jB,KAAK+vB,IAAI2kB,SAASznC,MAAMtF,IAAM,MAE9B3H,KAAK4lC,IAAMr0B,SAASC,gBAAgB,6BAA6B,OACjExR,KAAK4lC,IAAI34B,MAAM2W,SAAW,WAC1B5jB,KAAK4lC,IAAI34B,MAAMtF,IAAM,MACrB3H,KAAK4lC,IAAI34B,MAAMsF,MAAQvS,KAAKyO,QAAQ+lC,SAAW,EAAI,KACnDx0C,KAAK4lC,IAAI34B,MAAMuF,OAAS,OAExBxS,KAAK+vB,IAAIzQ,MAAM7N,YAAYzR,KAAK4lC,KAChC5lC,KAAK+vB,IAAIzQ,MAAM7N,YAAYzR,KAAK+vB,IAAI2kB,WAMtC3xC,EAAOoQ,UAAUu0B,KAAO,WAElB1nC,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,QAQnDvc,EAAOoQ,UAAUw0B,KAAO,WAEjB3nC,KAAK+vB,IAAIzQ,MAAMzV,YAClB7J,KAAK20B,KAAK5E,IAAI7D,OAAOza,YAAYzR,KAAK+vB,IAAIzQ,QAI9Cvc,EAAOoQ,UAAUD,WAAa,SAASzE,GACrC,GAAIP,IAAU,UAAU,cAAc,QAAQ,OAAO,QACrDvN,GAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASA,IAGjD1L,EAAOoQ,UAAUsO,OAAS,WACxB,GAAI0mB,GAAe,CACnB,KAAK,GAAI7Q,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,IACvI6Q,IAKN,IAAuC,GAAnCnoC,KAAKyO,QAAQzO,KAAKu0C,MAAM7rB,SAA2C,GAAvB1oB,KAAKmnC,gBAA+C,GAAxBnnC,KAAKyO,QAAQC,SAAoC,GAAhBy5B,EAC3GnoC,KAAK0nC,WAEF,CAqBH,GApBA1nC,KAAK2nC,OACmC,YAApC3nC,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,UAA8D,eAApC5jB,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,UAC5E5jB,KAAK+vB,IAAIzQ,MAAMrS,MAAMxF,KAAO,MAC5BzH,KAAK+vB,IAAIzQ,MAAMrS,MAAMqb,UAAY,OACjCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMqb,UAAY,OACpCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMxF,KAAQzH,KAAKyO,QAAQ+lC,SAAW,GAAM,KAC9Dx0C,KAAK+vB,IAAI2kB,SAASznC,MAAMoa,MAAQ,GAChCrnB,KAAK4lC,IAAI34B,MAAMxF,KAAO,MACtBzH,KAAK4lC,IAAI34B,MAAMoa,MAAQ,KAGvBrnB,KAAK+vB,IAAIzQ,MAAMrS,MAAMoa,MAAQ,MAC7BrnB,KAAK+vB,IAAIzQ,MAAMrS,MAAMqb,UAAY,QACjCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMqb,UAAY,QACpCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMoa,MAASrnB,KAAKyO,QAAQ+lC,SAAW,GAAM,KAC/Dx0C,KAAK+vB,IAAI2kB,SAASznC,MAAMxF,KAAO,GAC/BzH,KAAK4lC,IAAI34B,MAAMoa,MAAQ,MACvBrnB,KAAK4lC,IAAI34B,MAAMxF,KAAO,IAGgB,YAApCzH,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,UAA8D,aAApC5jB,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,SAC5E5jB,KAAK+vB,IAAIzQ,MAAMrS,MAAMtF,IAAM,EAAIzD,OAAOlE,KAAK20B,KAAK5E,IAAI7D,OAAOjf,MAAMtF,IAAI6C,QAAQ,KAAK,KAAO,KACzFxK,KAAK+vB,IAAIzQ,MAAMrS,MAAMqW,OAAS,OAE3B,CACH,GAAIqxB,GAAmB30C,KAAK20B,KAAKC,SAAS1I,OAAO1Z,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,MAC7FxS,MAAK+vB,IAAIzQ,MAAMrS,MAAMqW,OAAS,EAAIqxB,EAAmBzwC,OAAOlE,KAAK20B,KAAK5E,IAAI7D,OAAOjf,MAAMtF,IAAI6C,QAAQ,KAAK,KAAO,KAC/GxK,KAAK+vB,IAAIzQ,MAAMrS,MAAMtF,IAAM,GAGH,GAAtB3H,KAAKyO,QAAQu3B,OACfhmC,KAAK+vB,IAAIzQ,MAAMrS,MAAMsF,MAAQvS,KAAK+vB,IAAI2kB,SAAStkB,YAAc,GAAK,KAClEpwB,KAAK+vB,IAAI2kB,SAASznC,MAAMoa,MAAQ,GAChCrnB,KAAK+vB,IAAI2kB,SAASznC,MAAMxF,KAAO,GAC/BzH,KAAK4lC,IAAI34B,MAAMsF,MAAQ,QAGvBvS,KAAK+vB,IAAIzQ,MAAMrS,MAAMsF,MAAQvS,KAAKyO,QAAQ+lC,SAAW,GAAKx0C,KAAK+vB,IAAI2kB,SAAStkB,YAAc,GAAK,KAC/FpwB,KAAK40C,kBAGP,IAAIhlB,GAAU,EACd,KAAK,GAAI0H,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,KACvI1H,GAAW5vB,KAAKm0B,OAAOmD,GAAS1H,QAAU,UAIhD5vB,MAAK+vB,IAAI2kB,SAASzwB,UAAY2L,EAC9B5vB,KAAK+vB,IAAI2kB,SAASznC,MAAMsjB,WAAe,IAAOvwB,KAAKyO,QAAQ+lC,SAAYx0C,KAAKyO,QAAQgmC,YAAe,OAIvG1xC,EAAOoQ,UAAUyhC,gBAAkB,WACjC,GAAI50C,KAAK+vB,IAAIzQ,MAAMzV,WAAY,CAC7BjJ,EAAQiQ,gBAAgB7Q,KAAKinC,YAC7B,IAAIjjB,GAAUza,OAAOsrC,iBAAiB70C,KAAK+vB,IAAIzQ,OAAOw1B,WAClD/M,EAAa7jC,OAAO8f,EAAQxZ,QAAQ,KAAK,KACzCuH,EAAIg2B,EACJ1B,EAAYrmC,KAAKyO,QAAQ+lC,SACzB1M,EAAa,IAAO9nC,KAAKyO,QAAQ+lC,SACjCxiC,EAAI+1B,EAAa,GAAMD,EAAa,CAExC9nC,MAAK4lC,IAAI34B,MAAMsF,MAAQ8zB,EAAY,EAAI0B,EAAa,IAEpD,KAAK,GAAIzQ,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,KACvIt3B,KAAKm0B,OAAOmD,GAAS0Q,SAASj2B,EAAGC,EAAGhS,KAAKinC,YAAajnC,KAAK4lC,IAAKS,EAAWyB,GAC3E91B,GAAK81B,EAAa9nC,KAAKyO,QAAQgmC,aAKrC7zC,GAAQsQ,gBAAgBlR,KAAKinC,eAIjCpnC,EAAOD,QAAUmD,GAKb,SAASlD,EAAQD,EAASM,GAqB9B,QAAS8C,GAAU2xB,EAAMlmB,GACvBzO,KAAKK,GAAKM,EAAKqE,aACfhF,KAAK20B,KAAOA,EAEZ30B,KAAKq0B,gBACHuX,iBAAkB,OAClBmJ,aAAc,UACd7+B,MAAM,EACN8+B,UAAU,EACVC,YAAa,QACb1J,QACE78B,SAAS,EACT6lB,YAAa,UAEftnB,MAAO,OACPioC,UACE3iC,MAAO,GACP4iC,cAAe,UACfpG,MAAO,UAEThE,YACEr8B,SAAS,EACTs8B,gBAAiB,cACjBC,MAAO,IAET94B,YACEzD,SAAS,EACT2D,KAAM,EACNpF,MAAO,UAETmoC,UACEtP,iBAAiB,EACjBC,iBAAiB,EACjBC,OAAO,EACPzzB,MAAO,OACPmW,SAAS,EACT6S,YAAY,EACZD,aACE7zB,MAAOqE,IAAItF,OAAWkG,IAAIlG,QAC1B6gB,OAAQvb,IAAItF,OAAWkG,IAAIlG,UAkB/B6uC,QACE3mC,SAAS,EACTs3B,OAAO,EACPv+B,MACEihB,SAAS,EACT9E,SAAU,YAEZyD,OACEqB,SAAS,EACT9E,SAAU,cAGduQ,QACEqD,gBAKJx3B,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBACpCr0B,KAAK+vB,OACL/vB,KAAKgG,SACLhG,KAAK8D,OAAS,KACd9D,KAAKm0B,UACLn0B,KAAKs1C,oBAAqB,EAC1Bt1C,KAAKu1C,iBAAkB,EACvBv1C,KAAKw1C,yBAA0B,CAE/B,IAAIrhC,GAAKnU,IACTA,MAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGlB/1B,KAAK0vC,eACHz8B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAGw7B,OAAO77B,EAAO7R,QAEnB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAGy7B,UAAU97B,EAAO7R,QAEtBoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG07B,UAAU/7B,EAAO7R,SAKxBjC,KAAK8vC,gBACH78B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAG47B,aAAaj8B,EAAO7R,QAEzB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAG67B,gBAAgBl8B,EAAO7R,QAE5BoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG87B,gBAAgBn8B,EAAO7R,SAI9BjC,KAAKiC,SACLjC,KAAKmwC,aACLnwC,KAAKy1C,UAAYz1C,KAAK20B,KAAKc,MAAM7lB,MACjC5P,KAAKqwC,eAELrwC,KAAKinC,eACLjnC,KAAKkT,WAAWzE,GAChBzO,KAAKwqC,0BAA4B,GACjCxqC,KAAK01C,QAAU,EACf11C,KAAK20B,KAAKE,QAAQthB,GAAG,eAAgB,WACnCY,EAAGshC,UAAYthC,EAAGwgB,KAAKc,MAAM7lB,MAC7BuE,EAAGyxB,IAAI34B,MAAMxF,KAAO9G,EAAKmJ,OAAOK,QAAQgK,EAAGnO,MAAMuM,OACjD4B,EAAGsN,OAAOlhB,KAAK4T,GAAG,KAIpBnU,KAAK00B,UACL10B,KAAKgsC,WAAapG,IAAK5lC,KAAK4lC,IAAKqB,YAAajnC,KAAKinC,YAAax4B,QAASzO,KAAKyO,QAAS0lB,OAAQn0B,KAAKm0B,QACpGn0B,KAAK20B,KAAKE,QAAQjH,KAAK,UAvJzB,GAAIjtB,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BqC,EAAYrC,EAAoB,IAChCwC,EAAWxC,EAAoB,IAC/ByC,EAAazC,EAAoB,IACjC6C,EAAS7C,EAAoB,IAC7By1C,EAAoBz1C,EAAoB,IAExCowC,EAAY,eAiJhBttC,GAAUmQ,UAAY,GAAI5Q,GAK1BS,EAAUmQ,UAAUuhB,QAAU,WAC5B,GAAIpV,GAAQ/N,SAASM,cAAc,MACnCyN,GAAMzX,UAAY,YAClB7H,KAAK+vB,IAAIzQ,MAAQA,EAGjBtf,KAAK4lC,IAAMr0B,SAASC,gBAAgB,6BAA6B,OACjExR,KAAK4lC,IAAI34B,MAAM2W,SAAW,WAC1B5jB,KAAK4lC,IAAI34B,MAAMuF,QAAU,GAAKxS,KAAKyO,QAAQwmC,aAAazqC,QAAQ,KAAK,IAAM,KAC3ExK,KAAK4lC,IAAI34B,MAAM26B,QAAU,QACzBtoB,EAAM7N,YAAYzR,KAAK4lC,KAGvB5lC,KAAKyO,QAAQ2mC,SAAS7gB,YAAc,OACpCv0B,KAAK41C,UAAY,GAAIlzC,GAAS1C,KAAK20B,KAAM30B,KAAKyO,QAAQ2mC,SAAUp1C,KAAK4lC,IAAK5lC,KAAKyO,QAAQ0lB,QAEvFn0B,KAAKyO,QAAQ2mC,SAAS7gB,YAAc,QACpCv0B,KAAK61C,WAAa,GAAInzC,GAAS1C,KAAK20B,KAAM30B,KAAKyO,QAAQ2mC,SAAUp1C,KAAK4lC,IAAK5lC,KAAKyO,QAAQ0lB,cACjFn0B,MAAKyO,QAAQ2mC,SAAS7gB,YAG7Bv0B,KAAK81C,WAAa,GAAI/yC,GAAO/C,KAAK20B,KAAM30B,KAAKyO,QAAQ4mC,OAAQ,OAAQr1C,KAAKyO,QAAQ0lB,QAClFn0B,KAAK+1C,YAAc,GAAIhzC,GAAO/C,KAAK20B,KAAM30B,KAAKyO,QAAQ4mC,OAAQ,QAASr1C,KAAKyO,QAAQ0lB,QAEpFn0B,KAAK2nC,QAOP3kC,EAAUmQ,UAAUD,WAAa,SAASzE,GACxC,GAAIA,EAAS,CACX,GAAIP,IAAU,WAAW,eAAe,SAAS,cAAc,mBAAmB,QAAQ,WAAW,WAAW,OAAO,SAC3F1H,UAAxBiI,EAAQwmC,aAAgDzuC,SAAnBiI,EAAQ+D,QAAsEhM,SAA9CxG,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,QAC1GxS,KAAKu1C,iBAAkB,EACvBv1C,KAAKw1C,yBAA0B,GAEsBhvC,SAA9CxG,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,QAAgDhM,SAAxBiI,EAAQwmC,aACtErqC,UAAU6D,EAAQwmC,YAAc,IAAIzqC,QAAQ,KAAK,KAAOxK,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,SAC7FxS,KAAKu1C,iBAAkB,GAG3B50C,EAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASA,GAC/C9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,UACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,UAEpCA,EAAQs8B,YACuB,gBAAtBt8B,GAAQs8B,YACbt8B,EAAQs8B,WAAWC,kBACqB,WAAtCv8B,EAAQs8B,WAAWC,gBACrBhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,EAEa,WAAtCx8B,EAAQs8B,WAAWC,gBAC1BhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,GAGhCjrC,KAAKyO,QAAQs8B,WAAWC,gBAAkB,cAC1ChrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,KAMpCjrC,KAAK41C,WACkBpvC,SAArBiI,EAAQ2mC,WACVp1C,KAAK41C,UAAU1iC,WAAWlT,KAAKyO,QAAQ2mC,UACvCp1C,KAAK61C,WAAW3iC,WAAWlT,KAAKyO,QAAQ2mC,WAIxCp1C,KAAK81C,YACgBtvC,SAAnBiI,EAAQ4mC,SACVr1C,KAAK81C,WAAW5iC,WAAWlT,KAAKyO,QAAQ4mC,QACxCr1C,KAAK+1C,YAAY7iC,WAAWlT,KAAKyO,QAAQ4mC,SAIzCr1C,KAAKm0B,OAAOruB,eAAewqC,IAC7BtwC,KAAKm0B,OAAOmc,GAAWp9B,WAAWzE,GAKlCzO,KAAK+vB,IAAIzQ,OACXtf,KAAKyhB,QAAO,IAOhBze,EAAUmQ,UAAUu0B,KAAO,WAErB1nC,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,QASnDtc,EAAUmQ,UAAUw0B,KAAO,WAEpB3nC,KAAK+vB,IAAIzQ,MAAMzV,YAClB7J,KAAK20B,KAAK5E,IAAI7D,OAAOza,YAAYzR,KAAK+vB,IAAIzQ,QAS9Ctc,EAAUmQ,UAAU8iB,SAAW,SAASh0B,GACtC,GACEkT,GADEhB,EAAKnU,KAEPuyC,EAAevyC,KAAK81B,SAGtB,IAAK7zB,EAGA,CAAA,KAAIA,YAAiBpB,IAAWoB,YAAiBnB,IAIpD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK81B,UAAY7zB,MAHjBjC,MAAK81B,UAAY,IAoBnB,IAXIyc,IAEF5xC,EAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnDipC,EAAa7+B,IAAIpK,EAAOhB,KAI1B6M,EAAMo9B,EAAa18B,SACnB7V,KAAK6vC,UAAU16B,IAGbnV,KAAK81B,UAAW,CAElB,GAAIz1B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnD6K,EAAG2hB,UAAUviB,GAAGjK,EAAOhB,EAAUjI,KAInC8U,EAAMnV,KAAK81B,UAAUjgB,SACrB7V,KAAK2vC,OAAOx6B,GAEdnV,KAAKywC,mBAELzwC,KAAKyhB,QAAO,IAQdze,EAAUmQ,UAAU6iB,UAAY,SAAS7B,GACvC,GACIhf,GADAhB,EAAKnU,IAgBT,IAZIA,KAAK+1B,aACPp1B,EAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWniB,YAAYtK,EAAOhB,KAInC6M,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+1B,WAAa,KAClB/1B,KAAKiwC,gBAAgB96B,IAIlBgf,EAGA,CAAA,KAAIA,YAAkBtzB,IAAWszB,YAAkBrzB,IAItD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK+1B,WAAa5B,MAHlBn0B,MAAK+1B,WAAa,IASpB,IAAI/1B,KAAK+1B,WAAY,CAEnB,GAAI11B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWxiB,GAAGjK,EAAOhB,EAAUjI,KAIpC8U,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+vC,aAAa56B,GAEpBnV,KAAK4vC,aASP5sC,EAAUmQ,UAAUy8B,UAAY,WAC9B5vC,KAAKywC,mBACLzwC,KAAKg2C,sBAELh2C,KAAKyhB,QAAO,IAEdze,EAAUmQ,UAAUw8B,OAAkB,SAAUx6B,GAAMnV,KAAK4vC,UAAUz6B,IACrEnS,EAAUmQ,UAAU08B,UAAkB,SAAU16B,GAAMnV,KAAK4vC,UAAUz6B,IACrEnS,EAAUmQ,UAAU68B,gBAAmB,SAAUE,GAC/C,IAAK,GAAI1qC,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAAK,CACxC,GAAIyM,GAAQjS,KAAK+1B,WAAW7gB,IAAIg7B,EAAS1qC,GACzCxF,MAAKi2C,aAAahkC,EAAOi+B,EAAS1qC,IAIpCxF,KAAKyhB,QAAO,IAEdze,EAAUmQ,UAAU48B,aAAe,SAAUG,GAAWlwC,KAAKgwC,gBAAgBE,IAQ7EltC,EAAUmQ,UAAU88B,gBAAkB,SAAUC,GAC9C,IAAK,GAAI1qC,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/BxF,KAAKm0B,OAAOruB,eAAeoqC,EAAS1qC,MACmB,SAArDxF,KAAKm0B,OAAO+b,EAAS1qC,IAAIiJ,QAAQm9B,kBACnC5rC,KAAK61C,WAAWpO,YAAYyI,EAAS1qC,IACrCxF,KAAK+1C,YAAYtO,YAAYyI,EAAS1qC,IACtCxF,KAAK+1C,YAAYt0B,WAGjBzhB,KAAK41C,UAAUnO,YAAYyI,EAAS1qC,IACpCxF,KAAK81C,WAAWrO,YAAYyI,EAAS1qC,IACrCxF,KAAK81C,WAAWr0B,gBAEXzhB,MAAKm0B,OAAO+b,EAAS1qC,IAGhCxF,MAAKywC,mBAELzwC,KAAKyhB,QAAO,IAWdze,EAAUmQ,UAAU8iC,aAAe,SAAUhkC,EAAOqlB,GAC7Ct3B,KAAKm0B,OAAOruB,eAAewxB,IAY9Bt3B,KAAKm0B,OAAOmD,GAASziB,OAAO5C,GACyB,SAAjDjS,KAAKm0B,OAAOmD,GAAS7oB,QAAQm9B,kBAC/B5rC,KAAK61C,WAAWrO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,IACjDt3B,KAAK+1C,YAAYvO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,MAGlDt3B,KAAK41C,UAAUpO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,IAChDt3B,KAAK81C,WAAWtO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,OAlBnDt3B,KAAKm0B,OAAOmD,GAAW,GAAI30B,GAAWsP,EAAOqlB,EAASt3B,KAAKyO,QAASzO,KAAKwqC,0BACpB,SAAjDxqC,KAAKm0B,OAAOmD,GAAS7oB,QAAQm9B,kBAC/B5rC,KAAK61C,WAAWvO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,IAC9Ct3B,KAAK+1C,YAAYzO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,MAG/Ct3B,KAAK41C,UAAUtO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,IAC7Ct3B,KAAK81C,WAAWxO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,MAclDt3B,KAAK81C,WAAWr0B,SAChBzhB,KAAK+1C,YAAYt0B,UASnBze,EAAUmQ,UAAU6iC,oBAAsB,WACxC,GAAsB,MAAlBh2C,KAAK81B,UAAmB,CAC1B,GACIwB,GADA4e,IAEJ,KAAK5e,IAAWt3B,MAAKm0B,OACfn0B,KAAKm0B,OAAOruB,eAAewxB,KAC7B4e,EAAc5e,MAGlB,KAAK,GAAI/hB,KAAUvV,MAAK81B,UAAUljB,MAChC,GAAI5S,KAAK81B,UAAUljB,MAAM9M,eAAeyP,GAAS,CAC/C,GAAIlG,GAAOrP,KAAK81B,UAAUljB,MAAM2C,EAChC,IAAkC/O,SAA9B0vC,EAAc7mC,EAAK4C,OACrB,KAAM,IAAIrO,OAAM,4IAElByL,GAAK0C,EAAIpR,EAAKkG,QAAQwI,EAAK0C,EAAE,QAC7BmkC,EAAc7mC,EAAK4C,OAAOjK,KAAKqH,GAGnC,IAAKioB,IAAWt3B,MAAKm0B,OACfn0B,KAAKm0B,OAAOruB,eAAewxB,IAC7Bt3B,KAAKm0B,OAAOmD,GAASrB,SAASigB,EAAc5e,MAYpDt0B,EAAUmQ,UAAUs9B,iBAAmB,WACrC,GAAIzwC,KAAK81B,WAA+B,MAAlB91B,KAAK81B,UAAmB,CAC5C,GAAIqgB,GAAmB,CACvB,KAAK,GAAI5gC,KAAUvV,MAAK81B,UAAUljB,MAChC,GAAI5S,KAAK81B,UAAUljB,MAAM9M,eAAeyP,GAAS,CAC/C,GAAIlG,GAAOrP,KAAK81B,UAAUljB,MAAM2C,EACpB/O,SAAR6I,IACEA,EAAKvJ,eAAe,SACHU,SAAf6I,EAAK4C,QACP5C,EAAK4C,MAAQq+B,GAIfjhC,EAAK4C,MAAQq+B,EAEf6F,EAAmB9mC,EAAK4C,OAASq+B,EAAY6F,EAAmB,EAAIA,GAK1E,GAAwB,GAApBA,QACKn2C,MAAKm0B,OAAOmc,GACnBtwC,KAAK81C,WAAWrO,YAAY6I,GAC5BtwC,KAAK+1C,YAAYtO,YAAY6I,GAC7BtwC,KAAK41C,UAAUnO,YAAY6I,GAC3BtwC,KAAK61C,WAAWpO,YAAY6I,OAEzB,CACH,GAAIr+B,IAAS5R,GAAIiwC,EAAW1gB,QAAS5vB,KAAKyO,QAAQsmC,aAClD/0C,MAAKi2C,aAAahkC,EAAOq+B,eAIpBtwC,MAAKm0B,OAAOmc,GACnBtwC,KAAK81C,WAAWrO,YAAY6I,GAC5BtwC,KAAK+1C,YAAYtO,YAAY6I,GAC7BtwC,KAAK41C,UAAUnO,YAAY6I,GAC3BtwC,KAAK61C,WAAWpO,YAAY6I,EAG9BtwC,MAAK81C,WAAWr0B,SAChBzhB,KAAK+1C,YAAYt0B,UAQnBze,EAAUmQ,UAAUsO,OAAS,SAAS20B,GACpC,GAAI3R,IAAU,CAGdzkC,MAAKgG,MAAMuM,MAAQvS,KAAK+vB,IAAIzQ,MAAM8Q,YAClCpwB,KAAKgG,MAAMwM,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAGhChM,SAAnBxG,KAAK2xC,WAA2B3xC,KAAKgG,MAAMuM,QAC7C6jC,GAAmB,GAIrB3R,EAAUzkC,KAAKwkC,cAAgBC,CAG/B,IAAI+M,GAAkBxxC,KAAK20B,KAAKc,MAAM5lB,IAAM7P,KAAK20B,KAAKc,MAAM7lB,MACxD6hC,EAAUD,GAAmBxxC,KAAK0xC,mBA6BtC,IA5BA1xC,KAAK0xC,oBAAsBF,EAKZ,GAAX/M,IACFzkC,KAAK4lC,IAAI34B,MAAMsF,MAAQ5R,EAAKmJ,OAAOK,OAAO,EAAEnK,KAAKgG,MAAMuM,OACvDvS,KAAK4lC,IAAI34B,MAAMxF,KAAO9G,EAAKmJ,OAAOK,QAAQnK,KAAKgG,MAAMuM,QAGN,KAA1CvS,KAAKyO,QAAQ+D,OAAS,IAAI7L,QAAQ,MAA8C,GAAhC3G,KAAKw1C,2BACxDx1C,KAAKu1C,iBAAkB,IAKC,GAAxBv1C,KAAKu1C,iBACHv1C,KAAKyO,QAAQwmC,aAAej1C,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAAS,OAC1ExS,KAAKyO,QAAQwmC,YAAcj1C,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAAS,KACvExS,KAAK4lC,IAAI34B,MAAMuF,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAAS,MAEtExS,KAAKu1C,iBAAkB,GAGvBv1C,KAAK4lC,IAAI34B,MAAMuF,QAAU,GAAKxS,KAAKyO,QAAQwmC,aAAazqC,QAAQ,KAAK,IAAM,KAI9D,GAAXi6B,GAA6B,GAAVgN,GAA6C,GAA3BzxC,KAAKs1C,oBAAkD,GAApBc,EAC1E3R,EAAUzkC,KAAKq2C,gBAAkB5R,MAIjC,IAAsB,GAAlBzkC,KAAKy1C,UAAgB,CACvB,GAAI9rB,GAAS3pB,KAAK20B,KAAKc,MAAM7lB,MAAQ5P,KAAKy1C,UACtChgB,EAAQz1B,KAAK20B,KAAKc,MAAM5lB,IAAM7P,KAAK20B,KAAKc,MAAM7lB,KAClD,IAAwB,GAApB5P,KAAKgG,MAAMuM,MAAY,CACzB,GAAI+jC,GAAmBt2C,KAAKgG,MAAMuM,MAAMkjB,EACpC7L,EAAUD,EAAS2sB,CACvBt2C,MAAK4lC,IAAI34B,MAAMxF,MAASzH,KAAKgG,MAAMuM,MAAQqX,EAAW,MAO5D,MAFA5pB,MAAK81C,WAAWr0B,SAChBzhB,KAAK+1C,YAAYt0B,SACVgjB,GAQTzhC,EAAUmQ,UAAUkjC,aAAe,WAGjC,GADAz1C,EAAQiQ,gBAAgB7Q,KAAKinC,aACL,GAApBjnC,KAAKgG,MAAMuM,OAAgC,MAAlBvS,KAAK81B,UAAmB,CACnD,GAAI7jB,GAAOzM,EACP+wC,KACAC,KACAC,KACAC,GAAe,EAGfxG,IACJ,KAAK,GAAI5Y,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KAC7BrlB,EAAQjS,KAAKm0B,OAAOmD,GACC,GAAjBrlB,EAAMyW,SAAgEliB,SAA5CxG,KAAKyO,QAAQ0lB,OAAOqD,WAAWF,IAAqE,GAA3Ct3B,KAAKyO,QAAQ0lB,OAAOqD,WAAWF,IACpH4Y,EAASloC,KAAKsvB,GAIpB,IAAI4Y,EAASvqC,OAAS,EAAG,CAEvB,GAAIgxC,GAAU32C,KAAK20B,KAAKh0B,KAAK40B,cAAcv1B,KAAK20B,KAAKC,SAASl1B,KAAK6S,OAC/DqkC,EAAU52C,KAAK20B,KAAKh0B,KAAK40B,aAAa,EAAIv1B,KAAK20B,KAAKC,SAASl1B,KAAK6S,OAClEwjB,IAQJ,KANA/1B,KAAK62C,iBAAiB3G,EAAUna,EAAY4gB,EAASC,GAGrD52C,KAAK82C,eAAe5G,EAAUna,GAGzBvwB,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/B+wC,EAAsBrG,EAAS1qC,IAAMxF,KAAK+2C,qBAAqBhhB,EAAWma,EAAS1qC,IAIrFxF,MAAKg3C,YAAY9G,EAAUqG,EAAuBE,GAIlDC,EAAe12C,KAAKi3C,aAAa/G,EAAUuG,EAC3C,IAAIS,GAAa,CACjB,IAAoB,GAAhBR,GAAwB12C,KAAK01C,QAAUwB,EAKzC,MAJAt2C,GAAQsQ,gBAAgBlR,KAAKinC,aAC7BjnC,KAAKs1C,oBAAqB,EAC1Bt1C,KAAK01C,UACL11C,KAAK20B,KAAKE,QAAQjH,KAAK,WAChB,CAUP,KAPI5tB,KAAK01C,QAAUwB,GACjBve,QAAQhF,IAAI,6EAEd3zB,KAAK01C,QAAU,EACf11C,KAAKs1C,oBAAqB,EAGrB9vC,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/ByM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IAC7BgxC,EAAmBtG,EAAS1qC,IAAMxF,KAAKm3C,qBAAqBphB,EAAWma,EAAS1qC,IAAKyM,EAIvF,KAAKzM,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/ByM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IACF,OAAvByM,EAAMxD,QAAQxB,OAChBgF,EAAM85B,KAAKyK,EAAmBtG,EAAS1qC,IAAKyM,EAAOjS,KAAKgsC,UAG5D2J,GAAkB5J,KAAKmE,EAAUsG,EAAoBx2C,KAAKgsC,YAOhE,MADAprC,GAAQsQ,gBAAgBlR,KAAKinC,cACtB,GAiBTjkC,EAAUmQ,UAAU0jC,iBAAmB,SAAU3G,EAAUna,EAAY4gB,EAASC,GAC9E,GAAI3kC,GAAOzM,EAAGomB,EAAGvc,CACjB,IAAI6gC,EAASvqC,OAAS,EACpB,IAAKH,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAAK,CACpCyM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IAC7BuwB,EAAWma,EAAS1qC,MACpB,IAAI4xC,GAAgBrhB,EAAWma,EAAS1qC,GAExC;GAA0B,GAAtByM,EAAMxD,QAAQyH,KAAc,CAC9B,GAAImhC,GAAQnyC,KAAKwH,IAAI,EAAG/L,EAAK4O,kBAAkB0C,EAAM6jB,UAAW6gB,EAAS,IAAK,UAC9E,KAAK/qB,EAAIyrB,EAAOzrB,EAAI3Z,EAAM6jB,UAAUnwB,OAAQimB,IAE1C,GADAvc,EAAO4C,EAAM6jB,UAAUlK,GACVplB,SAAT6I,EAAoB,CACtB,GAAIA,EAAK0C,EAAI6kC,EAAS,CACpBQ,EAAcpvC,KAAKqH,EACnB,OAGA+nC,EAAcpvC,KAAKqH,QAMzB,KAAKuc,EAAI,EAAGA,EAAI3Z,EAAM6jB,UAAUnwB,OAAQimB,IACtCvc,EAAO4C,EAAM6jB,UAAUlK,GACVplB,SAAT6I,GACEA,EAAK0C,EAAI4kC,GAAWtnC,EAAK0C,EAAI6kC,GAC/BQ,EAAcpvC,KAAKqH,KAgBjCrM,EAAUmQ,UAAU2jC,eAAiB,SAAU5G,EAAUna,GACvD,GAAI9jB,EACJ,IAAIi+B,EAASvqC,OAAS,EACpB,IAAK,GAAIH,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAEnC,GADAyM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IACC,GAA1ByM,EAAMxD,QAAQumC,SAAkB,CAClC,GAAIoC,GAAgBrhB,EAAWma,EAAS1qC,GACxC,IAAI4xC,EAAczxC,OAAS,EAAG,CAC5B,GAAI2xC,GAAY,EACZC,EAAiBH,EAAczxC,OAI/B6xC,EAAYx3C,KAAK20B,KAAKh0B,KAAKw0B,eAAeiiB,EAAcA,EAAczxC,OAAS,GAAGoM,GAAK/R,KAAK20B,KAAKh0B,KAAKw0B,eAAeiiB,EAAc,GAAGrlC,GACtI0lC,EAAiBF,EAAiBC,CACtCF,GAAYpyC,KAAK4G,IAAI5G,KAAKwyC,KAAK,GAAMH,GAAiBryC,KAAKwH,IAAI,EAAGxH,KAAKwoB,MAAM+pB,IAG7E,KAAK,GADDE,MACK/rB,EAAI,EAAO2rB,EAAJ3rB,EAAoBA,GAAK0rB,EACvCK,EAAY3vC,KAAKovC,EAAcxrB,GAGjCmK,GAAWma,EAAS1qC,IAAMmyC,KAgBpC30C,EAAUmQ,UAAU6jC,YAAc,SAAU9G,EAAUna,EAAY0gB,GAChE,GAAI3K,GAAW75B,EAAOzM,EAGlBiJ,EAFAmpC,KACAC,IAEJ,IAAI3H,EAASvqC,OAAS,EAAG,CACvB,IAAKH,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/BsmC,EAAY/V,EAAWma,EAAS1qC,IAChCiJ,EAAUzO,KAAKm0B,OAAO+b,EAAS1qC,IAAIiJ,QAC/Bq9B,EAAUnmC,OAAS,IACrBsM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IAES,SAAlCiJ,EAAQymC,SAASC,eAA6C,OAAjB1mC,EAAQxB,MACvB,QAA5BwB,EAAQm9B,iBAA6BgM,EAAuBA,EAAoB5jC,OAAO/B,EAAM45B,UAAUC,IAClE+L,EAAuBA,EAAqB7jC,OAAO/B,EAAM45B,UAAUC,IAG5G2K,EAAYvG,EAAS1qC,IAAMyM,EAAM45B,UAAUC,EAAUoE,EAAS1qC,IAMpEmwC,GAAkBmC,oBAAoBF,EAAsBnB,EAAavG,EAAU,iBAAmB,QACtGyF,EAAkBmC,oBAAoBD,EAAsBpB,EAAavG,EAAU,kBAAmB,WAW1GltC,EAAUmQ,UAAU8jC,aAAe,SAAU/G,EAAUuG,GACrD,GAGoEsB,GAAQC,EAHxEvT,GAAU,EACVwT,GAAgB,EAChBC,GAAiB,EACjBC,EAAU,IAAKC,EAAW,IAAKC,EAAU,KAAMC,EAAW,IAE9D,IAAIpI,EAASvqC,OAAS,EAAG,CAEvB,IAAK,GAAIH,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAAK,CACxC,GAAIyM,GAAQjS,KAAKm0B,OAAO+b,EAAS1qC,GAC7ByM,IAA2C,SAAlCA,EAAMxD,QAAQm9B,kBACzBqM,GAAgB,EAChBE,EAAU,EACVE,EAAU,GAEHpmC,GAASA,EAAMxD,QAAQm9B,mBAC9BsM,GAAiB,EACjBE,EAAW,EACXE,EAAW,GAKf,IAAK,GAAI9yC,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/BixC,EAAY3wC,eAAeoqC,EAAS1qC,KAClCixC,EAAYvG,EAAS1qC,IAAI+yC,UAAW,IACtCR,EAAStB,EAAYvG,EAAS1qC,IAAIsG,IAClCksC,EAASvB,EAAYvG,EAAS1qC,IAAIkH,IAEe,SAA7C+pC,EAAYvG,EAAS1qC,IAAIomC,kBAC3BqM,GAAgB,EAChBE,EAAUA,EAAUJ,EAASA,EAASI,EACtCE,EAAoBL,EAAVK,EAAmBL,EAASK,IAGtCH,GAAiB,EACjBE,EAAWA,EAAWL,EAASA,EAASK,EACxCE,EAAsBN,EAAXM,EAAoBN,EAASM,GAM3B,IAAjBL,GACFj4C,KAAK41C,UAAUriB,SAAS4kB,EAASE,GAEb,GAAlBH,GACFl4C,KAAK61C,WAAWtiB,SAAS6kB,EAAUE,GAoCvC,MAjCA7T,GAAUzkC,KAAKw4C,qBAAqBP,EAAgBj4C,KAAK41C,YAAenR,EACxEA,EAAUzkC,KAAKw4C,qBAAqBN,EAAgBl4C,KAAK61C,aAAepR,EAElD,GAAlByT,GAA2C,GAAjBD,GAC5Bj4C,KAAK41C,UAAU6C,WAAY,EAC3Bz4C,KAAK61C,WAAW4C,WAAY,IAG5Bz4C,KAAK41C,UAAU6C,WAAY,EAC3Bz4C,KAAK61C,WAAW4C,WAAY,GAE9Bz4C,KAAK61C,WAAW7O,QAAUiR,EACI,GAA1Bj4C,KAAK61C,WAAW7O,QACWhnC,KAAK41C,UAAU7O,WAAtB,GAAlBmR,EAAqDl4C,KAAK61C,WAAWtjC,MAChB,EAEzDkyB,EAAUzkC,KAAK41C,UAAUn0B,UAAYgjB,EACrCzkC,KAAK61C,WAAWhP,iBAAmB7mC,KAAK41C,UAAUhP,WAClD5mC,KAAK61C,WAAW/O,aAAe9mC,KAAK41C,UAAU9O,aAC9CrC,EAAUzkC,KAAK61C,WAAWp0B,UAAYgjB,GAGtCA,EAAUzkC,KAAK61C,WAAWp0B,UAAYgjB,EAIE,IAAtCyL,EAASvpC,QAAQ,mBACnBupC,EAAS9nC,OAAO8nC,EAASvpC,QAAQ,kBAAkB,GAEV,IAAvCupC,EAASvpC,QAAQ,oBACnBupC,EAAS9nC,OAAO8nC,EAASvpC,QAAQ,mBAAmB,GAG/C89B,GAYTzhC,EAAUmQ,UAAUqlC,qBAAuB,SAAUE,EAAU5X,GAC7D,GAAI7B,IAAU,CAad,OAZgB,IAAZyZ,EACE5X,EAAK/Q,IAAIzQ,MAAMzV,YAA6B,GAAfi3B,EAAK5H,SACpC4H,EAAK4G,OACLzI,GAAU,GAIP6B,EAAK/Q,IAAIzQ,MAAMzV,YAA6B,GAAfi3B,EAAK5H,SACrC4H,EAAK6G,OACL1I,GAAU,GAGPA,GAaTj8B,EAAUmQ,UAAU4jC,qBAAuB,SAAU4B,GAKnD,IAAK,GAHDC,GAAQC,EADRC,KAEA7jB,EAAWj1B,KAAK20B,KAAKh0B,KAAKs0B,SAErBzvB,EAAI,EAAGA,EAAImzC,EAAWhzC,OAAQH,IACrCozC,EAAS3jB,EAAS0jB,EAAWnzC,GAAGuM,GAAK/R,KAAKgG,MAAMuM,MAChDsmC,EAASF,EAAWnzC,GAAGwM,EACvB8mC,EAAc9wC,MAAM+J,EAAG6mC,EAAQ5mC,EAAG6mC,GAGpC,OAAOC,IAcT91C,EAAUmQ,UAAUgkC,qBAAuB,SAAUwB,EAAY1mC,GAC/D,GACI2mC,GAAQC,EADRC,KAEA7jB,EAAWj1B,KAAK20B,KAAKh0B,KAAKs0B,SAC1B6L,EAAO9gC,KAAK41C,UACZmD,EAAY70C,OAAOlE,KAAK4lC,IAAI34B,MAAMuF,OAAOhI,QAAQ,KAAK,IACpB,UAAlCyH,EAAMxD,QAAQm9B,mBAChB9K,EAAO9gC,KAAK61C,WAGd,KAAK,GAAIrwC,GAAI,EAAGA,EAAImzC,EAAWhzC,OAAQH,IACrCozC,EAAS3jB,EAAS0jB,EAAWnzC,GAAGuM,GAAK/R,KAAKgG,MAAMuM,MAChDsmC,EAAS3zC,KAAKwoB,MAAMoT,EAAK2I,aAAakP,EAAWnzC,GAAGwM,IACpD8mC,EAAc9wC,MAAM+J,EAAG6mC,EAAQ5mC,EAAG6mC,GAKpC,OAFA5mC,GAAM64B,gBAAgB5lC,KAAK4G,IAAIitC,EAAWjY,EAAK2I,aAAa,KAErDqP,GAITj5C,EAAOD,QAAUoD,GAKb,SAASnD,EAAQD,EAASM,GAgB9B,QAAS+C,GAAU0xB,EAAMlmB,GACvBzO,KAAK+vB,KACH0c,WAAY,KACZjG,SACAwS,cACAC,cACAjoC,WACEw1B,SACAwS,cACAC,gBAGJj5C,KAAKgG,OACHyvB,OACE7lB,MAAO,EACPC,IAAK,EACLurB,YAAa,GAEf8d,QAAS,GAGXl5C,KAAKq0B,gBACHE,YAAa,SAEbuR,iBAAiB,EACjBC,iBAAiB,EACjBzE,OAAQ,MAEVthC,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAEpCr0B,KAAK20B,KAAOA,EAGZ30B,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GAjDlB,GAAI9N,GAAOT,EAAoB,GAC3BqC,EAAYrC,EAAoB,IAChC6B,EAAW7B,EAAoB,IAC/ByB,EAAWzB,EAAoB,IAC/B2D,EAAS3D,EAAoB,GAgDjC+C,GAASkQ,UAAY,GAAI5Q,GAUzBU,EAASkQ,UAAUD,WAAa,SAASzE,GACnCA,IAEF9N,EAAKoF,iBACH,cACA,kBACA,kBACA,cACA,UACC/F,KAAKyO,QAASA,GAIb,UAAYA,KACe,kBAAlB5K,GAAOwgC,OAEhBxgC,EAAOwgC,OAAO51B,EAAQ41B,QAGtBxgC,EAAOygC,KAAK71B,EAAQ41B,WAS5BphC,EAASkQ,UAAUuhB,QAAU,WAC3B10B,KAAK+vB,IAAI0c,WAAal7B,SAASM,cAAc,OAC7C7R,KAAK+vB,IAAI5jB,WAAaoF,SAASM,cAAc,OAE7C7R,KAAK+vB,IAAI0c,WAAW5kC,UAAY,sBAChC7H,KAAK+vB,IAAI5jB,WAAWtE,UAAY,uBAMlC5E,EAASkQ,UAAUG,QAAU,WAEvBtT,KAAK+vB,IAAI0c,WAAW5iC,YACtB7J,KAAK+vB,IAAI0c,WAAW5iC,WAAWsH,YAAYnR,KAAK+vB,IAAI0c,YAElDzsC,KAAK+vB,IAAI5jB,WAAWtC,YACtB7J,KAAK+vB,IAAI5jB,WAAWtC,WAAWsH,YAAYnR,KAAK+vB,IAAI5jB,YAGtDnM,KAAK20B,KAAO,MAOd1xB,EAASkQ,UAAUsO,OAAS,WAC1B,GAAIhT,GAAUzO,KAAKyO,QACfzI,EAAQhG,KAAKgG,MACbymC,EAAazsC,KAAK+vB,IAAI0c,WACtBtgC,EAAanM,KAAK+vB,IAAI5jB,WAGtB24B,EAAiC,OAAvBr2B,EAAQ8lB,YAAwBv0B,KAAK20B,KAAK5E,IAAIpoB,IAAM3H,KAAK20B,KAAK5E,IAAIzM,OAC5E61B,EAAiB1M,EAAW5iC,aAAei7B,CAG/C9kC,MAAKooC,oBAGL,IACItC,IADc9lC,KAAKyO,QAAQ8lB,YACTv0B,KAAKyO,QAAQq3B,iBAC/BC,EAAkB/lC,KAAKyO,QAAQs3B,eAGnC//B,GAAMqiC,iBAAmBvC,EAAkB9/B,EAAMsiC,gBAAkB,EACnEtiC,EAAMuiC,iBAAmBxC,EAAkB//B,EAAMwiC,gBAAkB,EACnExiC,EAAMwM,OAASxM,EAAMqiC,iBAAmBriC,EAAMuiC,iBAC9CviC,EAAMuM,MAAQk6B,EAAWrc,YAEzBpqB,EAAM0iC,gBAAkB1oC,KAAK20B,KAAKC,SAASl1B,KAAK8S,OAASxM,EAAMuiC,kBACnC,OAAvB95B,EAAQ8lB,YAAuBv0B,KAAK20B,KAAKC,SAAStR,OAAO9Q,OAASxS,KAAK20B,KAAKC,SAASjtB,IAAI6K,QAC9FxM,EAAMyiC,eAAiB,EACvBziC,EAAM4iC,gBAAkB5iC,EAAM0iC,gBAAkB1iC,EAAMuiC,iBACtDviC,EAAM2iC,eAAiB,CAGvB,IAAIyQ,GAAwB3M,EAAW4M,YACnCC,EAAwBntC,EAAWktC,WAsBvC,OArBA5M,GAAW5iC,YAAc4iC,EAAW5iC,WAAWsH,YAAYs7B,GAC3DtgC,EAAWtC,YAAcsC,EAAWtC,WAAWsH,YAAYhF,GAE3DsgC,EAAWx/B,MAAMuF,OAASxS,KAAKgG,MAAMwM,OAAS,KAE9CxS,KAAKu5C,iBAGDH,EACFtU,EAAOlzB,aAAa66B,EAAY2M,GAGhCtU,EAAOrzB,YAAYg7B,GAEjB6M,EACFt5C,KAAK20B,KAAK5E,IAAIgV,mBAAmBnzB,aAAazF,EAAYmtC,GAG1Dt5C,KAAK20B,KAAK5E,IAAIgV,mBAAmBtzB,YAAYtF,GAGxCnM,KAAKwkC,cAAgB2U,GAO9Bl2C,EAASkQ,UAAUomC,eAAiB,WAClC,GAAIhlB,GAAcv0B,KAAKyO,QAAQ8lB,YAG3B3kB,EAAQjP,EAAKkG,QAAQ7G,KAAK20B,KAAKc,MAAM7lB,MAAO,UAC5CC,EAAMlP,EAAKkG,QAAQ7G,KAAK20B,KAAKc,MAAM5lB,IAAK,UACxC2pC,EAAgBx5C,KAAK20B,KAAKh0B,KAAK00B,OAA2C,GAAnCr1B,KAAKgG,MAAM+jC,gBAAkB,KAAS/iC,UAC7Eo0B,EAAcoe,EAAgB73C,EAASk5B,wBAAwB76B,KAAK20B,KAAKI,YAAa/0B,KAAK20B,KAAKc,MAAO+jB,EAC3Gpe,IAAep7B,KAAK20B,KAAKh0B,KAAK00B,OAAO,GAAGruB,SAExC,IAAImhB,GAAO,GAAIpmB,GAAS,GAAIuC,MAAKsL,GAAQ,GAAItL,MAAKuL,GAAMurB,EAAap7B,KAAK20B,KAAKI,YAC3E/0B,MAAKyO,QAAQ6yB,QACfnZ,EAAK4Z,UAAU/hC,KAAKyO,QAAQ6yB,QAE9BthC,KAAKmoB,KAAOA,CAKZ,IAAI4H,GAAM/vB,KAAK+vB,GACfA,GAAI/e,UAAUw1B,MAAQzW,EAAIyW,MAC1BzW,EAAI/e,UAAUgoC,WAAajpB,EAAIipB,WAC/BjpB,EAAI/e,UAAUioC,WAAalpB,EAAIkpB,WAC/BlpB,EAAIyW,SACJzW,EAAIipB,cACJjpB,EAAIkpB,aAEJ,IAAIQ,GAEAvc,EAGAwc,EAGA7xC,EAPAkK,EAAI,EAEJ4nC,EAAQ,EACRpnC,EAAQ,EAERqnC,EAAmBpzC,OACnBkG,EAAM,CAIV,KADAyb,EAAK8Z,QACE9Z,EAAK0U,WAAmB,IAANnwB,GACvBA,IAEA+sC,EAAMtxB,EAAKC,aACX8U,EAAU/U,EAAK+U,UACfr1B,EAAYsgB,EAAK2b,eAEjB6V,EAAQ5nC,EACRA,EAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASwkB,GAC5BlnC,EAAQR,EAAI4nC,EACRD,IACFA,EAASzsC,MAAMsF,MAAQA,EAAQ,MAG7BvS,KAAKyO,QAAQq3B,iBACf9lC,KAAK65C,kBAAkB9nC,EAAGoW,EAAKyb,gBAAiBrP,EAAa1sB,GAG3Dq1B,GAAWl9B,KAAKyO,QAAQs3B,iBACtBh0B,EAAI,IACkBvL,QAApBozC,IACFA,EAAmB7nC,GAErB/R,KAAK85C,kBAAkB/nC,EAAGoW,EAAK0b,gBAAiBtP,EAAa1sB,IAE/D6xC,EAAW15C,KAAK+5C,kBAAkBhoC,EAAGwiB,EAAa1sB,IAGlD6xC,EAAW15C,KAAKg6C,kBAAkBjoC,EAAGwiB,EAAa1sB,GAGpDsgB,EAAKE,MAIP,IAAIroB,KAAKyO,QAAQs3B,gBAAiB,CAChC,GAAIkU,GAAWj6C,KAAK20B,KAAKh0B,KAAK00B,OAAO,GACjC6kB,EAAW/xB,EAAK0b,cAAcoW,GAC9BE,EAAYD,EAASv0C,QAAU3F,KAAKgG,MAAM8jC,gBAAkB,IAAM,IAE9CtjC,QAApBozC,GAA6CA,EAAZO,IACnCn6C,KAAK85C,kBAAkB,EAAGI,EAAU3lB,EAAa1sB,GAKrDlH,EAAK0H,QAAQrI,KAAK+vB,IAAI/e,UAAW,SAAUopC,GACzC,KAAOA,EAAIz0C,QAAQ,CACjB,GAAI4B,GAAO6yC,EAAIC,KACX9yC,IAAQA,EAAKsC,YACftC,EAAKsC,WAAWsH,YAAY5J,OAcpCtE,EAASkQ,UAAU0mC,kBAAoB,SAAU9nC,EAAGwX,EAAMgL,EAAa1sB,GAErE,GAAI4gB,GAAQzoB,KAAK+vB,IAAI/e,UAAUioC,WAAW3nC,OAE1C,KAAKmX,EAAO,CAEV,GAAImH,GAAUre,SAAS04B,eAAe,GACtCxhB,GAAQlX,SAASM,cAAc,OAC/B4W,EAAMhX,YAAYme,GAClB5vB,KAAK+vB,IAAI0c,WAAWh7B,YAAYgX,GAElCzoB,KAAK+vB,IAAIkpB,WAAWjxC,KAAKygB,GAEzBA,EAAM6xB,WAAW,GAAGC,UAAYhxB,EAEhCd,EAAMxb,MAAMtF,IAAsB,OAAf4sB,EAAyBv0B,KAAKgG,MAAMuiC,iBAAmB,KAAQ,IAClF9f,EAAMxb,MAAMxF,KAAOsK,EAAI,KACvB0W,EAAM5gB,UAAY,cAAgBA,GAYpC5E,EAASkQ,UAAU2mC,kBAAoB,SAAU/nC,EAAGwX,EAAMgL,EAAa1sB,GAErE,GAAI4gB,GAAQzoB,KAAK+vB,IAAI/e,UAAUgoC,WAAW1nC,OAE1C,KAAKmX,EAAO,CAEV,GAAImH,GAAUre,SAAS04B,eAAe1gB,EACtCd,GAAQlX,SAASM,cAAc,OAC/B4W,EAAMhX,YAAYme,GAClB5vB,KAAK+vB,IAAI0c,WAAWh7B,YAAYgX,GAElCzoB,KAAK+vB,IAAIipB,WAAWhxC,KAAKygB,GAEzBA,EAAM6xB,WAAW,GAAGC,UAAYhxB,EAChCd,EAAM5gB,UAAY,cAAgBA,EAGlC4gB,EAAMxb,MAAMtF,IAAsB,OAAf4sB,EAAwB,IAAOv0B,KAAKgG,MAAMqiC,iBAAoB,KACjF5f,EAAMxb,MAAMxF,KAAOsK,EAAI,MAWzB9O,EAASkQ,UAAU6mC,kBAAoB,SAAUjoC,EAAGwiB,EAAa1sB,GAE/D,GAAIgoB,GAAO7vB,KAAK+vB,IAAI/e,UAAUw1B,MAAMl1B,OAC/Bue,KAEHA,EAAOte,SAASM,cAAc,OAC9B7R,KAAK+vB,IAAI5jB,WAAWsF,YAAYoe,IAElC7vB,KAAK+vB,IAAIyW,MAAMx+B,KAAK6nB,EAEpB,IAAI7pB,GAAQhG,KAAKgG,KAYjB,OAVE6pB,GAAK5iB,MAAMtF,IADM,OAAf4sB,EACevuB,EAAMuiC,iBAAmB,KAGzBvoC,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAAS,KAEnDqd,EAAK5iB,MAAMuF,OAASxM,EAAM0iC,gBAAkB,KAC5C7Y,EAAK5iB,MAAMxF,KAAQsK,EAAI/L,EAAMyiC,eAAiB,EAAK,KAEnD5Y,EAAKhoB,UAAY,uBAAyBA,EAEnCgoB,GAWT5sB,EAASkQ,UAAU4mC,kBAAoB,SAAUhoC,EAAGwiB,EAAa1sB,GAE/D,GAAIgoB,GAAO7vB,KAAK+vB,IAAI/e,UAAUw1B,MAAMl1B,OAC/Bue,KAEHA,EAAOte,SAASM,cAAc,OAC9B7R,KAAK+vB,IAAI5jB,WAAWsF,YAAYoe,IAElC7vB,KAAK+vB,IAAIyW,MAAMx+B,KAAK6nB,EAEpB,IAAI7pB,GAAQhG,KAAKgG,KAYjB,OAVE6pB,GAAK5iB,MAAMtF,IADM,OAAf4sB,EACe,IAGAv0B,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAAS,KAEnDqd,EAAK5iB,MAAMxF,KAAQsK,EAAI/L,EAAM2iC,eAAiB,EAAK,KACnD9Y,EAAK5iB,MAAMuF,OAASxM,EAAM4iC,gBAAkB,KAE5C/Y,EAAKhoB,UAAY,uBAAyBA,EAEnCgoB,GAQT5sB,EAASkQ,UAAUi1B,mBAAqB,WAKjCpoC,KAAK+vB,IAAIma,mBACZlqC,KAAK+vB,IAAIma,iBAAmB34B,SAASM,cAAc,OACnD7R,KAAK+vB,IAAIma,iBAAiBriC,UAAY,qBACtC7H,KAAK+vB,IAAIma,iBAAiBj9B,MAAM2W,SAAW,WAE3C5jB,KAAK+vB,IAAIma,iBAAiBz4B,YAAYF,SAAS04B,eAAe,MAC9DjqC,KAAK+vB,IAAI0c,WAAWh7B,YAAYzR,KAAK+vB,IAAIma,mBAE3ClqC,KAAKgG,MAAMsiC,gBAAkBtoC,KAAK+vB,IAAIma,iBAAiBrlB,aACvD7kB,KAAKgG,MAAM+jC,eAAiB/pC,KAAK+vB,IAAIma,iBAAiB1qB,YAGjDxf,KAAK+vB,IAAIqa,mBACZpqC,KAAK+vB,IAAIqa,iBAAmB74B,SAASM,cAAc,OACnD7R,KAAK+vB,IAAIqa,iBAAiBviC,UAAY,qBACtC7H,KAAK+vB,IAAIqa,iBAAiBn9B,MAAM2W,SAAW,WAE3C5jB,KAAK+vB,IAAIqa,iBAAiB34B,YAAYF,SAAS04B,eAAe,MAC9DjqC,KAAK+vB,IAAI0c,WAAWh7B,YAAYzR,KAAK+vB,IAAIqa,mBAE3CpqC,KAAKgG,MAAMwiC,gBAAkBxoC,KAAK+vB,IAAIqa,iBAAiBvlB,aACvD7kB,KAAKgG,MAAM8jC,eAAiB9pC,KAAK+vB,IAAIqa,iBAAiB5qB,aASxDvc,EAASkQ,UAAU6hB,KAAO,SAASyD,GACjC,MAAOz4B,MAAKmoB,KAAK6M,KAAKyD,IAGxB54B,EAAOD,QAAUqD,GAKb,SAASpD,EAAQD,EAASM,GAc9B,QAASgC,GAAMwQ,EAAM0nB,EAAY3rB,GAC/BzO,KAAKK,GAAK,KACVL,KAAK8kC,OAAS,KACd9kC,KAAK0S,KAAOA,EACZ1S,KAAK+vB,IAAM,KACX/vB,KAAKo6B,WAAaA,MAClBp6B,KAAKyO,QAAUA,MAEfzO,KAAKqzC,UAAW,EAChBrzC,KAAKktC,WAAY,EACjBltC,KAAKitC,OAAQ,EAEbjtC,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KACZzH,KAAKuS,MAAQ,KACbvS,KAAKwS,OAAS,KA3BhB,GAAIzO,GAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,EA6B/BgC,GAAKiR,UAAUrR,OAAQ,EAKvBI,EAAKiR,UAAUg+B,OAAS,WACtBnxC,KAAKqzC,UAAW,EAChBrzC,KAAKitC,OAAQ,EACTjtC,KAAKktC,WAAWltC,KAAKyhB,UAM3Bvf,EAAKiR,UAAU+9B,SAAW,WACxBlxC,KAAKqzC,UAAW,EAChBrzC,KAAKitC,OAAQ,EACTjtC,KAAKktC,WAAWltC,KAAKyhB,UAQ3Bvf,EAAKiR,UAAU6E,QAAU,SAAStF,GAChC1S,KAAK0S,KAAOA,EACZ1S,KAAKitC,OAAQ,EACTjtC,KAAKktC,WAAWltC,KAAKyhB,UAO3Bvf,EAAKiR,UAAUu6B,UAAY,SAAS5I,GAC9B9kC,KAAKktC,WACPltC,KAAK0nC,OACL1nC,KAAK8kC,OAASA,EACV9kC,KAAK8kC,QACP9kC,KAAK2nC,QAIP3nC,KAAK8kC,OAASA,GASlB5iC,EAAKiR,UAAU27B,UAAY,WAEzB,OAAO,GAOT5sC,EAAKiR,UAAUw0B,KAAO,WACpB,OAAO,GAOTzlC,EAAKiR,UAAUu0B,KAAO,WACpB,OAAO,GAMTxlC,EAAKiR,UAAUsO,OAAS,aAOxBvf,EAAKiR,UAAUw7B,YAAc,aAO7BzsC,EAAKiR,UAAUo6B,YAAc,aAS7BrrC,EAAKiR,UAAUqnC,qBAAuB,SAAUC,GAC9C,GAAIz6C,KAAKqzC,UAAYrzC,KAAKyO,QAAQygC,SAAS74B,SAAWrW,KAAK+vB,IAAI2qB,aAAc,CAE3E,GAAIvmC,GAAKnU,KAEL06C,EAAenpC,SAASM,cAAc,MAC1C6oC,GAAa7yC,UAAY,SACzB6yC,EAAa1V,MAAQ,mBAGrB,GAAIjhC,GAAO22C,GAAcnnC,GAAG,MAAO,SAAUjK,GAC3C6K,EAAG2wB,OAAOiJ,kBAAkB55B,GAC5B7K,EAAMq8B,kBACNr8B,EAAMD,mBAGRoxC,EAAOhpC,YAAYipC,GACnB16C,KAAK+vB,IAAI2qB,aAAeA,OAEhB16C,KAAKqzC,UAAYrzC,KAAK+vB,IAAI2qB,eAE9B16C,KAAK+vB,IAAI2qB,aAAa7wC,YACxB7J,KAAK+vB,IAAI2qB,aAAa7wC,WAAWsH,YAAYnR,KAAK+vB,IAAI2qB,cAExD16C,KAAK+vB,IAAI2qB,aAAe,OAS5Bx4C,EAAKiR,UAAUwnC,gBAAkB,SAAU/xC,GACzC,GAAIgnB,EACJ,IAAI5vB,KAAKyO,QAAQmsC,SAAU,CACzB,GAAI/jB,GAAW72B,KAAK8kC,OAAOjP,QAAQC,UAAU5gB,IAAIlV,KAAKK,GACtDuvB,GAAU5vB,KAAKyO,QAAQmsC,SAAS/jB,OAGhCjH,GAAU5vB,KAAK0S,KAAKkd,OAGtB,IAAGA,IAAY5vB,KAAK4vB,QAAS,CAE3B,GAAIA,YAAmB+c,SACrB/jC,EAAQqb,UAAY,GACpBrb,EAAQ6I,YAAYme,OAEjB,IAAeppB,QAAXopB,EACPhnB,EAAQqb,UAAY2L,MAGpB,IAAwB,cAAlB5vB,KAAK0S,KAAK5L,MAA8CN,SAAtBxG,KAAK0S,KAAKkd,QAChD,KAAM,IAAIhsB,OAAM,sCAAwC5D,KAAKK,GAIjEL,MAAK4vB,QAAUA,IASnB1tB,EAAKiR,UAAU0nC,aAAe,SAAUjyC,GACf,MAAnB5I,KAAK0S,KAAKsyB,MACZp8B,EAAQo8B,MAAQhlC,KAAK0S,KAAKsyB,OAAS,GAGnCp8B,EAAQkyC,gBAAgB,UAS3B54C,EAAKiR,UAAU4nC,sBAAwB,SAASnyC,GAC/C,GAAI5I,KAAKyO,QAAQusC,gBAAkBh7C,KAAKyO,QAAQusC,eAAer1C,OAAS,EAAG,CACzE,GAAIs1C,KAEJ,IAAIh1C,MAAMC,QAAQlG,KAAKyO,QAAQusC,gBAC7BC,EAAaj7C,KAAKyO,QAAQusC,mBAEvB,CAAA,GAAmC,OAA/Bh7C,KAAKyO,QAAQusC,eAIpB,MAHAC,GAAa10C,OAAO6G,KAAKpN,KAAK0S,MAMhC,IAAK,GAAIlN,GAAI,EAAGA,EAAIy1C,EAAWt1C,OAAQH,IAAK,CAC1C,GAAIyQ,GAAOglC,EAAWz1C,GAClB6B,EAAQrH,KAAK0S,KAAKuD,EAET,OAAT5O,EACFuB,EAAQsyC,aAAa,QAAUjlC,EAAM5O,GAGrCuB,EAAQkyC,gBAAgB,QAAU7kC,MAW1C/T,EAAKiR,UAAUgoC,aAAe,SAASvyC,GAEjC5I,KAAKiN,QACPtM,EAAK8M,cAAc7E,EAAS5I,KAAKiN,OACjCjN,KAAKiN,MAAQ,MAIXjN,KAAK0S,KAAKzF,QACZtM,EAAK2M,WAAW1E,EAAS5I,KAAK0S,KAAKzF,OACnCjN,KAAKiN,MAAQjN,KAAK0S,KAAKzF,QAI3BpN,EAAOD,QAAUsC,GAKb,SAASrC,EAAQD,EAASM,GAkB9B,QAASiC,GAAgBuQ,EAAM0nB,EAAY3rB,GASzC,GARAzO,KAAKgG,OACH4pB,SACErd,MAAO,IAGXvS,KAAK6jB,UAAW,EAGZnR,EAAM,CACR,GAAkBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAAKrS,GAE7D,IAAgBmG,QAAZkM,EAAK7C,IACP,KAAM,IAAIjM,OAAM,kCAAoC8O,EAAKrS,IAI7D6B,EAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GAElCzO,KAAKo7C,cAAe,EApCtB,GACIl5C,IADShC,EAAoB,IACtBA,EAAoB,KAC3B2C,EAAkB3C,EAAoB,IACtCoC,EAAYpC,EAAoB,GAoCpCiC,GAAegR,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAEjDC,EAAegR,UAAUkoC,cAAgB,kBACzCl5C,EAAegR,UAAUrR,OAAQ,EAOjCK,EAAegR,UAAU27B,UAAY,SAASrZ,GAE5C,MAAQz1B,MAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,KAAS7P,KAAK0S,KAAK7C,IAAM4lB,EAAM7lB,OAMjEzN,EAAegR,UAAUsO,OAAS,WAChC,GAAIsO,GAAM/vB,KAAK+vB,GAuBf,IAtBKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAIygB,IAAMj/B,SAASM,cAAc,OAIjCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAIygB,IAAI/+B,YAAYse,EAAIH,SAMxB5vB,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAIygB,IAAI3mC,WAAY,CACvB,GAAIsC,GAAanM,KAAK8kC,OAAO/U,IAAI5jB,UACjC,KAAKA,EACH,KAAM,IAAIvI,OAAM,iEAElBuI,GAAWsF,YAAYse,EAAIygB,KAQ7B,GANAxwC,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAIH,SAC3B5vB,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAIH,SACpC5vB,KAAKm7C,aAAan7C,KAAK+vB,IAAIygB,IAG3B,IAAI3oC,IAAa7H,KAAK0S,KAAK7K,UAAa,IAAM7H,KAAK0S,KAAK7K,UAAa,KAChE7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAIygB,IAAI3oC,UAAY7H,KAAKq7C,cAAgBxzC,EAGzC7H,KAAK6jB,SAA6D,WAAlDta,OAAOsrC,iBAAiB9kB,EAAIH,SAAS/L,SAGrD7jB,KAAKgG,MAAM4pB,QAAQrd,MAAQvS,KAAK+vB,IAAIH,QAAQQ,YAC5CpwB,KAAKwS,OAAS,EAEdxS,KAAKitC,OAAQ,IAQjB9qC,EAAegR,UAAUw0B,KAAOrlC,EAAU6Q,UAAUw0B,KAMpDxlC,EAAegR,UAAUu0B,KAAOplC,EAAU6Q,UAAUu0B,KAMpDvlC,EAAegR,UAAUw7B,YAAcrsC,EAAU6Q,UAAUw7B,YAM3DxsC,EAAegR,UAAUo6B,YAAc,SAAS7zB,GAC9C,GAAI4hC,GAAqC,QAA7Bt7C,KAAKyO,QAAQ8lB,WACzBv0B,MAAK+vB,IAAIH,QAAQ3iB,MAAMtF,IAAM2zC,EAAQ,GAAK,IAC1Ct7C,KAAK+vB,IAAIH,QAAQ3iB,MAAMqW,OAASg4B,EAAQ,IAAM,EAC9C,IAAI9oC,EAGJ,IAA2BhM,SAAvBxG,KAAK0S,KAAK2uB,SAAwB,CACpC,GAAIka,GAAev7C,KAAK0S,KAAK2uB,SACzBF,EAAYnhC,KAAK8kC,OAAO3D,UACxB8K,EAAgB9K,EAAUoa,GAAcpzC,KAE5C,IAAa,GAATmzC,EAAe,CAEjB9oC,EAASxS,KAAK8kC,OAAO3D,UAAUoa,GAAc/oC,OAASkH,EAAOrK,KAAKoW,SAClEjT,GAA2B,GAAjBy5B,EAAqBvyB,EAAOonB,KAAO,GAAIpnB,EAAOrK,KAAKoW,SAAW,CACxE,IAAI2b,GAASphC,KAAK8kC,OAAOn9B,GACzB,KAAK,GAAI05B,KAAYF,GACfA,EAAUr7B,eAAeu7B,IACQ,GAA/BF,EAAUE,GAAU3Y,SAAmByY,EAAUE,GAAUl5B,MAAQ8jC,IACrE7K,GAAUD,EAAUE,GAAU7uB,OAASkH,EAAOrK,KAAKoW,SAMzD2b,IAA2B,GAAjB6K,EAAqBvyB,EAAOonB,KAAO,GAAMpnB,EAAOrK,KAAKoW,SAAW,EAC1EzlB,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAMy5B,EAAS,KAClCphC,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAAS,OAGzB,CACH,GAAI8d,GAASphC,KAAK8kC,OAAOn9B,GACzB,KAAK,GAAI05B,KAAYF,GACfA,EAAUr7B,eAAeu7B,IACQ,GAA/BF,EAAUE,GAAU3Y,SAAmByY,EAAUE,GAAUl5B,MAAQ8jC,IACrE7K,GAAUD,EAAUE,GAAU7uB,OAASkH,EAAOrK,KAAKoW,SAIzDjT,GAASxS,KAAK8kC,OAAO3D,UAAUoa,GAAc/oC,OAASkH,EAAOrK,KAAKoW,SAClEzlB,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAMy5B,EAAS,KAClCphC,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAAS,QAM1BtjB,MAAK8kC,iBAAkBjiC,IAEzB2P,EAAStN,KAAKwH,IAAI1M,KAAK8kC,OAAOtyB,OAC1BxS,KAAK8kC,OAAOjP,QAAQlB,KAAKC,SAAS1I,OAAO1Z,OACzCxS,KAAK8kC,OAAOjP,QAAQlB,KAAKC,SAASiD,gBAAgBrlB,QACtDxS,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAM2zC,EAAQ,IAAM,GACvCt7C,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAASg4B,EAAQ,GAAK,MAGzC9oC,EAASxS,KAAK8kC,OAAOtyB,OAErBxS,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAM3H,KAAK8kC,OAAOn9B,IAAM,KAC3C3H,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAAS,GAGhCtjB,MAAK+vB,IAAIygB,IAAIvjC,MAAMuF,OAASA,EAAS,MAGvC3S,EAAOD,QAAUuC,GAKb,SAAStC,EAAQD,EAASM,GAe9B,QAASkC,GAASsQ,EAAM0nB,EAAY3rB,GAalC,GAZAzO,KAAKgG,OACH8pB,KACEvd,MAAO,EACPC,OAAQ,GAEVqd,MACEtd,MAAO,EACPC,OAAQ,IAKRE,GACgBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAI1DxQ,GAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GAhCpC,CAAA,GAAIvM,GAAOhC,EAAoB,GACpBA,GAAoB,GAkC/BkC,EAAQ+Q,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAO1CE,EAAQ+Q,UAAU27B,UAAY,SAASrZ,GAGrC,GAAIjD,IAAYiD,EAAM5lB,IAAM4lB,EAAM7lB,OAAS,CAC3C,OAAQ5P,MAAK0S,KAAK9C,MAAQ6lB,EAAM7lB,MAAQ4iB,GAAcxyB,KAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,IAAM2iB,GAMtFpwB,EAAQ+Q,UAAUsO,OAAS,WACzB,GAAIsO,GAAM/vB,KAAK+vB,GA6Bf,IA5BKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAIygB,IAAMj/B,SAASM,cAAc,OAGjCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAIygB,IAAI/+B,YAAYse,EAAIH,SAGxBG,EAAIF,KAAOte,SAASM,cAAc,OAClCke,EAAIF,KAAKhoB,UAAY,OAGrBkoB,EAAID,IAAMve,SAASM,cAAc,OACjCke,EAAID,IAAIjoB,UAAY,MAGpBkoB,EAAIygB,IAAI,iBAAmBxwC,KAE3BA,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAIygB,IAAI3mC,WAAY,CACvB,GAAI4iC,GAAazsC,KAAK8kC,OAAO/U,IAAI0c,UACjC,KAAKA,EAAY,KAAM,IAAI7oC,OAAM,iEACjC6oC,GAAWh7B,YAAYse,EAAIygB,KAE7B,IAAKzgB,EAAIF,KAAKhmB,WAAY,CACxB,GAAIsC,GAAanM,KAAK8kC,OAAO/U,IAAI5jB,UACjC,KAAKA,EAAY,KAAM,IAAIvI,OAAM,iEACjCuI,GAAWsF,YAAYse,EAAIF,MAE7B,IAAKE,EAAID,IAAIjmB,WAAY,CACvB,GAAIi3B,GAAO9gC,KAAK8kC,OAAO/U,IAAI+Q,IAC3B,KAAK30B,EAAY,KAAM,IAAIvI,OAAM,2DACjCk9B,GAAKrvB,YAAYse,EAAID,KAQvB,GANA9vB,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAIygB,KAC3BxwC,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAIygB,KACpCxwC,KAAKm7C,aAAan7C,KAAK+vB,IAAIygB,IAG3B,IAAI3oC,IAAa7H,KAAK0S,KAAK7K,UAAW,IAAM7H,KAAK0S,KAAK7K,UAAY,KAC7D7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAIygB,IAAI3oC,UAAY,WAAaA,EACjCkoB,EAAIF,KAAKhoB,UAAY,YAAcA,EACnCkoB,EAAID,IAAIjoB,UAAa,WAAaA,EAGlC7H,KAAKgG,MAAM8pB,IAAItd,OAASud,EAAID,IAAIQ,aAChCtwB,KAAKgG,MAAM8pB,IAAIvd,MAAQwd,EAAID,IAAIM,YAC/BpwB,KAAKgG,MAAM6pB,KAAKtd,MAAQwd,EAAIF,KAAKO,YACjCpwB,KAAKuS,MAAQwd,EAAIygB,IAAIpgB,YACrBpwB,KAAKwS,OAASud,EAAIygB,IAAIlgB,aAEtBtwB,KAAKitC,OAAQ,EAGfjtC,KAAKw6C,qBAAqBzqB,EAAIygB,MAOhCpuC,EAAQ+Q,UAAUw0B,KAAO,WAClB3nC,KAAKktC,WACRltC,KAAKyhB,UAOTrf,EAAQ+Q,UAAUu0B,KAAO,WACvB,GAAI1nC,KAAKktC,UAAW,CAClB,GAAInd,GAAM/vB,KAAK+vB,GAEXA,GAAIygB,IAAI3mC,YAAckmB,EAAIygB,IAAI3mC,WAAWsH,YAAY4e,EAAIygB,KACzDzgB,EAAIF,KAAKhmB,YAAakmB,EAAIF,KAAKhmB,WAAWsH,YAAY4e,EAAIF,MAC1DE,EAAID,IAAIjmB,YAAckmB,EAAID,IAAIjmB,WAAWsH,YAAY4e,EAAID,KAE7D9vB,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KAEZzH,KAAKktC,WAAY,IAQrB9qC,EAAQ+Q,UAAUw7B,YAAc,WAC9B,GAAI/+B,GAAQ5P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK9C,OAC3Cm/B,EAAQ/uC,KAAKyO,QAAQsgC,MAErByB,EAAMxwC,KAAK+vB,IAAIygB,IACf3gB,EAAO7vB,KAAK+vB,IAAIF,KAChBC,EAAM9vB,KAAK+vB,IAAID,GAIjB9vB,MAAKyH,KADM,SAATsnC,EACUn/B,EAAQ5P,KAAKuS,MAET,QAATw8B,EACKn/B,EAIAA,EAAQ5P,KAAKuS,MAAQ,EAInCi+B,EAAIvjC,MAAMxF,KAAOzH,KAAKyH,KAAO,KAG7BooB,EAAK5iB,MAAMxF,KAAQmI,EAAQ5P,KAAKgG,MAAM6pB,KAAKtd,MAAQ,EAAK,KAGxDud,EAAI7iB,MAAMxF,KAAQmI,EAAQ5P,KAAKgG,MAAM8pB,IAAIvd,MAAQ,EAAK,MAOxDnQ,EAAQ+Q,UAAUo6B,YAAc,WAC9B,GAAIhZ,GAAcv0B,KAAKyO,QAAQ8lB,YAC3Bic,EAAMxwC,KAAK+vB,IAAIygB,IACf3gB,EAAO7vB,KAAK+vB,IAAIF,KAChBC,EAAM9vB,KAAK+vB,IAAID,GAEnB,IAAmB,OAAfyE,EACFic,EAAIvjC,MAAMtF,KAAW3H,KAAK2H,KAAO,GAAK,KAEtCkoB,EAAK5iB,MAAMtF,IAAS,IACpBkoB,EAAK5iB,MAAMuF,OAAUxS,KAAK8kC,OAAOn9B,IAAM3H,KAAK2H,IAAM,EAAK,KACvDkoB,EAAK5iB,MAAMqW,OAAS,OAEjB,CACH,GAAIk4B,GAAgBx7C,KAAK8kC,OAAOjP,QAAQ7vB,MAAMwM,OAC1C+d,EAAairB,EAAgBx7C,KAAK8kC,OAAOn9B,IAAM3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,GAE7E6oC,GAAIvjC,MAAMtF,KAAW3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,IAAM3H,KAAKwS,QAAU,GAAK,KACzEqd,EAAK5iB,MAAMtF,IAAU6zC,EAAgBjrB,EAAc,KACnDV,EAAK5iB,MAAMqW,OAAS,IAGtBwM,EAAI7iB,MAAMtF,KAAQ3H,KAAKgG,MAAM8pB,IAAItd,OAAS,EAAK,MAGjD3S,EAAOD,QAAUwC,GAKb,SAASvC,EAAQD,EAASM,GAc9B,QAASmC,GAAWqQ,EAAM0nB,EAAY3rB,GAcpC,GAbAzO,KAAKgG,OACH8pB,KACEnoB,IAAK,EACL4K,MAAO,EACPC,OAAQ,GAEVod,SACEpd,OAAQ,EACRipC,WAAY,IAKZ/oC,GACgBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAI1DxQ,GAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GAhCpC,GAAIvM,GAAOhC,EAAoB,GAmC/BmC,GAAU8Q,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAO5CG,EAAU8Q,UAAU27B,UAAY,SAASrZ,GAGvC,GAAIjD,IAAYiD,EAAM5lB,IAAM4lB,EAAM7lB,OAAS,CAC3C,OAAQ5P,MAAK0S,KAAK9C,MAAQ6lB,EAAM7lB,MAAQ4iB,GAAcxyB,KAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,IAAM2iB,GAMtFnwB,EAAU8Q,UAAUsO,OAAS,WAC3B,GAAIsO,GAAM/vB,KAAK+vB,GA0Bf,IAzBKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAI7d,MAAQX,SAASM,cAAc,OAInCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAI7d,MAAMT,YAAYse,EAAIH,SAG1BG,EAAID,IAAMve,SAASM,cAAc,OACjCke,EAAI7d,MAAMT,YAAYse,EAAID,KAG1BC,EAAI7d,MAAM,iBAAmBlS,KAE7BA,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAI7d,MAAMrI,WAAY,CACzB,GAAI4iC,GAAazsC,KAAK8kC,OAAO/U,IAAI0c,UACjC,KAAKA,EACH,KAAM,IAAI7oC,OAAM,iEAElB6oC,GAAWh7B,YAAYse,EAAI7d,OAQ7B,GANAlS,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAI7d,OAC3BlS,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAI7d,OACpClS,KAAKm7C,aAAan7C,KAAK+vB,IAAI7d,MAG3B,IAAIrK,IAAa7H,KAAK0S,KAAK7K,UAAW,IAAM7H,KAAK0S,KAAK7K,UAAY,KAC7D7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAI7d,MAAMrK,UAAa,aAAeA,EACtCkoB,EAAID,IAAIjoB,UAAa,WAAaA,EAGlC7H,KAAKuS,MAAQwd,EAAI7d,MAAMke,YACvBpwB,KAAKwS,OAASud,EAAI7d,MAAMoe,aACxBtwB,KAAKgG,MAAM8pB,IAAIvd,MAAQwd,EAAID,IAAIM,YAC/BpwB,KAAKgG,MAAM8pB,IAAItd,OAASud,EAAID,IAAIQ,aAChCtwB,KAAKgG,MAAM4pB,QAAQpd,OAASud,EAAIH,QAAQU,aAGxCP,EAAIH,QAAQ3iB,MAAMwuC,WAAa,EAAIz7C,KAAKgG,MAAM8pB,IAAIvd,MAAQ,KAG1Dwd,EAAID,IAAI7iB,MAAMtF,KAAQ3H,KAAKwS,OAASxS,KAAKgG,MAAM8pB,IAAItd,QAAU,EAAK,KAClEud,EAAID,IAAI7iB,MAAMxF,KAAQzH,KAAKgG,MAAM8pB,IAAIvd,MAAQ,EAAK,KAElDvS,KAAKitC,OAAQ,EAGfjtC,KAAKw6C,qBAAqBzqB,EAAI7d,QAOhC7P,EAAU8Q,UAAUw0B,KAAO,WACpB3nC,KAAKktC,WACRltC,KAAKyhB,UAOTpf,EAAU8Q,UAAUu0B,KAAO,WACrB1nC,KAAKktC,YACHltC,KAAK+vB,IAAI7d,MAAMrI,YACjB7J,KAAK+vB,IAAI7d,MAAMrI,WAAWsH,YAAYnR,KAAK+vB,IAAI7d,OAGjDlS,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KAEZzH,KAAKktC,WAAY,IAQrB7qC,EAAU8Q,UAAUw7B,YAAc,WAChC,GAAI/+B,GAAQ5P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK9C,MAE/C5P,MAAKyH,KAAOmI,EAAQ5P,KAAKgG,MAAM8pB,IAAIvd,MAGnCvS,KAAK+vB,IAAI7d,MAAMjF,MAAMxF,KAAOzH,KAAKyH,KAAO,MAO1CpF,EAAU8Q,UAAUo6B,YAAc,WAChC,GAAIhZ,GAAcv0B,KAAKyO,QAAQ8lB,YAC3BriB,EAAQlS,KAAK+vB,IAAI7d,KAGnBA,GAAMjF,MAAMtF,IADK,OAAf4sB,EACgBv0B,KAAK2H,IAAM,KAGV3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,IAAM3H,KAAKwS,OAAU,MAItE3S,EAAOD,QAAUyC,GAKb,SAASxC,EAAQD,EAASM,GAe9B,QAASoC,GAAWoQ,EAAM0nB,EAAY3rB,GASpC,GARAzO,KAAKgG,OACH4pB,SACErd,MAAO,IAGXvS,KAAK6jB,UAAW,EAGZnR,EAAM,CACR,GAAkBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAAKrS,GAE7D,IAAgBmG,QAAZkM,EAAK7C,IACP,KAAM,IAAIjM,OAAM,kCAAoC8O,EAAKrS,IAI7D6B,EAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GA/BpC,GACIvM,IADShC,EAAoB,IACtBA,EAAoB,IAiC/BoC,GAAU6Q,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAE5CI,EAAU6Q,UAAUkoC,cAAgB,aAOpC/4C,EAAU6Q,UAAU27B,UAAY,SAASrZ,GAEvC,MAAQz1B,MAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,KAAS7P,KAAK0S,KAAK7C,IAAM4lB,EAAM7lB,OAMjEtN,EAAU6Q,UAAUsO,OAAS,WAC3B,GAAIsO,GAAM/vB,KAAK+vB,GAsBf,IArBKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAIygB,IAAMj/B,SAASM,cAAc,OAIjCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAIygB,IAAI/+B,YAAYse,EAAIH,SAGxBG,EAAIygB,IAAI,iBAAmBxwC,KAE3BA,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAIygB,IAAI3mC,WAAY,CACvB,GAAI4iC,GAAazsC,KAAK8kC,OAAO/U,IAAI0c,UACjC,KAAKA,EACH,KAAM,IAAI7oC,OAAM,iEAElB6oC,GAAWh7B,YAAYse,EAAIygB,KAQ7B,GANAxwC,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAIygB,KAC3BxwC,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAIygB,KACpCxwC,KAAKm7C,aAAan7C,KAAK+vB,IAAIygB,IAG3B,IAAI3oC,IAAa7H,KAAK0S,KAAK7K,UAAa,IAAM7H,KAAK0S,KAAK7K,UAAa,KAChE7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAIygB,IAAI3oC,UAAY7H,KAAKq7C,cAAgBxzC,EAGzC7H,KAAK6jB,SAA6D,WAAlDta,OAAOsrC,iBAAiB9kB,EAAIH,SAAS/L,SAKrD7jB,KAAK+vB,IAAIH,QAAQ3iB,MAAMyuC,SAAW,OAClC17C,KAAKgG,MAAM4pB,QAAQrd,MAAQvS,KAAK+vB,IAAIH,QAAQQ,YAC5CpwB,KAAKwS,OAASxS,KAAK+vB,IAAIygB,IAAIlgB,aAC3BtwB,KAAK+vB,IAAIH,QAAQ3iB,MAAMyuC,SAAW,GAElC17C,KAAKitC,OAAQ,EAGfjtC,KAAKw6C,qBAAqBzqB,EAAIygB,KAC9BxwC,KAAK27C,mBACL37C,KAAK47C,qBAOPt5C,EAAU6Q,UAAUw0B,KAAO,WACpB3nC,KAAKktC,WACRltC,KAAKyhB,UAQTnf,EAAU6Q,UAAUu0B,KAAO,WACzB,GAAI1nC,KAAKktC,UAAW,CAClB,GAAIsD,GAAMxwC,KAAK+vB,IAAIygB,GAEfA,GAAI3mC,YACN2mC,EAAI3mC,WAAWsH,YAAYq/B,GAG7BxwC,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KAEZzH,KAAKktC,WAAY,IAQrB5qC,EAAU6Q,UAAUw7B,YAAc,WAChC,GAGIkN,GACA1rB,EAJA2rB,EAAc97C,KAAK8kC,OAAOvyB,MAC1B3C,EAAQ5P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK9C,OAC3CC,EAAM7P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK7C,MAKhCisC,EAATlsC,IACFA,GAASksC,GAEPjsC,EAAM,EAAIisC,IACZjsC,EAAM,EAAIisC,EAEZ,IAAIC,GAAW72C,KAAKwH,IAAImD,EAAMD,EAAO,EAoBrC,QAlBI5P,KAAK6jB,UACP7jB,KAAKyH,KAAOmI,EACZ5P,KAAKuS,MAAQwpC,EAAW/7C,KAAKgG,MAAM4pB,QAAQrd,MAC3C4d,EAAenwB,KAAKgG,MAAM4pB,QAAQrd,QAOlCvS,KAAKyH,KAAOmI,EACZ5P,KAAKuS,MAAQwpC,EACb5rB,EAAejrB,KAAK4G,IAAI+D,EAAMD,EAAQ,EAAI5P,KAAKyO,QAAQuV,QAAShkB,KAAKgG,MAAM4pB,QAAQrd,QAGrFvS,KAAK+vB,IAAIygB,IAAIvjC,MAAMxF,KAAOzH,KAAKyH,KAAO,KACtCzH,KAAK+vB,IAAIygB,IAAIvjC,MAAMsF,MAAQwpC,EAAW,KAE9B/7C,KAAKyO,QAAQsgC,OACnB,IAAK,OACH/uC,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAO,GAC9B,MAEF,KAAK,QACHzH,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAOvC,KAAKwH,IAAKqvC,EAAW5rB,EAAe,EAAInwB,KAAKyO,QAAQuV,QAAU,GAAK,IAClG,MAEF,KAAK,SACHhkB,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAOvC,KAAKwH,KAAKqvC,EAAW5rB,EAAe,EAAInwB,KAAKyO,QAAQuV,SAAW,EAAG,GAAK,IACtG,MAEF,SAIM63B,EAFA77C,KAAK6jB,SACHhU,EAAM,EACM3K,KAAKwH,KAAKkD,EAAO,IAGhBugB,EAIL,EAARvgB,EACY1K,KAAK4G,KAAK8D,EACnBC,EAAMD,EAAQugB,EAAe,EAAInwB,KAAKyO,QAAQuV,SAIrC,EAGlBhkB,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAOo0C,EAAc,OAQlDv5C,EAAU6Q,UAAUo6B,YAAc,WAChC,GAAIhZ,GAAcv0B,KAAKyO,QAAQ8lB,YAC3Bic,EAAMxwC,KAAK+vB,IAAIygB,GAGjBA,GAAIvjC,MAAMtF,IADO,OAAf4sB,EACcv0B,KAAK2H,IAAM,KAGV3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,IAAM3H,KAAKwS,OAAU,MAQpElQ,EAAU6Q,UAAUwoC,iBAAmB,WACrC,GAAI37C,KAAKqzC,UAAYrzC,KAAKyO,QAAQygC,SAASC,aAAenvC,KAAK+vB,IAAIisB,SAAU,CAE3E,GAAIA,GAAWzqC,SAASM,cAAc,MACtCmqC,GAASn0C,UAAY,YACrBm0C,EAAS9I,aAAelzC,KASxBA,KAAK+vB,IAAIygB,IAAI/+B,YAAYuqC,GACzBh8C,KAAK+vB,IAAIisB,SAAWA,OAEZh8C,KAAKqzC,UAAYrzC,KAAK+vB,IAAIisB,WAE9Bh8C,KAAK+vB,IAAIisB,SAASnyC,YACpB7J,KAAK+vB,IAAIisB,SAASnyC,WAAWsH,YAAYnR,KAAK+vB,IAAIisB,UAEpDh8C,KAAK+vB,IAAIisB,SAAW,OAQxB15C,EAAU6Q,UAAUyoC,kBAAoB,WACtC,GAAI57C,KAAKqzC,UAAYrzC,KAAKyO,QAAQygC,SAASC,aAAenvC,KAAK+vB,IAAIksB,UAAW,CAE5E,GAAIA,GAAY1qC,SAASM,cAAc,MACvCoqC,GAAUp0C,UAAY,aACtBo0C,EAAU9I,cAAgBnzC,KAS1BA,KAAK+vB,IAAIygB,IAAI/+B,YAAYwqC,GACzBj8C,KAAK+vB,IAAIksB,UAAYA,OAEbj8C,KAAKqzC,UAAYrzC,KAAK+vB,IAAIksB,YAE9Bj8C,KAAK+vB,IAAIksB,UAAUpyC,YACrB7J,KAAK+vB,IAAIksB,UAAUpyC,WAAWsH,YAAYnR,KAAK+vB,IAAIksB,WAErDj8C,KAAK+vB,IAAIksB,UAAY,OAIzBp8C,EAAOD,QAAU0C,GAKb,SAASzC,EAAQD,EAASM,GAkC9B,QAASgD,GAASqW,EAAW7G,EAAMjE,GACjC,KAAMzO,eAAgBkD,IACpB,KAAM,IAAIsW,aAAY,mDAGxBxZ,MAAKk8C,0BACLl8C,KAAKm8C,0BAGLn8C,KAAKyZ,iBAAmBF,EAGxBvZ,KAAKo8C,kBAAoB,GACzBp8C,KAAKq8C,eAAiB,IAAOr8C,KAAKo8C,kBAClCp8C,KAAKs8C,WAAa,EAClBt8C,KAAKu8C,YAAc,EACnBv8C,KAAKw8C,gBAAiB,EACtBx8C,KAAKy8C,wBAA0B,GAE/Bz8C,KAAK08C,cAAe,EAEpB18C,KAAK28C,kBAAoB1pC,IAAI,KAAK2pC,KAAK,KAAKC,SAAS,KAAKC,QAAQ,KAAKC,IAAI,MAG3E/8C,KAAKq0B,gBACH2oB,OACEC,KAAM,EACNC,UAAW,GACXC,UAAW,GACX1xB,OAAQ,GACR2xB,MAAO,UACPC,MAAO72C,OACP0gB,SAAU,GACVC,SAAU,GACVm2B,UAAW,QACXC,SAAU,GACVC,SAAU,UACVC,SAAUj3C,OACVk3C,gBAAiB,EACjBC,gBAAiB,QACjBC,MAAO,GACPzyC,OACIiB,OAAQ,UACRD,WAAY,UACdE,WACED,OAAQ,UACRD,WAAY,WAEdG,OACEF,OAAQ,UACRD,WAAY,YAGhB8F,MAAOzL,OACPwZ,YAAa,EACb69B,oBAAqBr3C,QAEvBs3C,OACE52B,SAAU,EACVC,SAAU,GACV5U,MAAO,EACPwrC,yBAA0B,EAC1BC,WAAY,IACZ/wC,MAAO,OACP9B,OACEA,MAAM,UACNkB,UAAU,UACVC,MAAO,WAETgxC,UAAW,UACXC,SAAU,GACVC,SAAU,QACVC,SAAU,QACVC,gBAAiB,EACjBC,gBAAiB,QACjBM,eAAe,aACfC,iBAAkB,EAClBC,MACEx4C,OAAQ,GACRy4C,IAAK,EACLC,UAAW73C,QAEb83C,aAAc,QAEhBC,kBAAiB,EACjBC,SACEC,WACE/vC,SAAS,EACTgwC,cAAe,EACfC,sBAAuB,KACvBC,eAAgB,GAChBC,aAAc,GACdC,eAAgB,IAChBC,QAAS,KAEXC,WACEJ,eAAgB,EAChBC,aAAc,IACdC,eAAgB,IAChBG,aAAc,IACdF,QAAS,KAEXG,uBACExwC,SAAS,EACTkwC,eAAgB,EAChBC,aAAc,IACdC,eAAgB,IAChBG,aAAc,IACdF,QAAS,KAEXA,QAAS,KACTH,eAAgB,KAChBC,aAAc,KACdC,eAAgB,MAElBK,YACEzwC,SAAS,EACT0wC,gBAAiB,IACjBC,iBAAiB,IACjBC,cAAc,IACdC,eAAgB,GAChBC,qBAAsB,GACtBC,gBAAiB,IACjBC,oBAAqB,GACrBC,mBAAoB,EACpBC,YAAa,IACbC,mBAAoB,GACpBC,sBAAuB,GACvBC,WAAY,GACZC,aAAcztC,MAAQ,EACRC,OAAQ,EACRiZ,OAAQ,GACtBw0B,sBAAuB,IACvBC,kBAAmB,GACnBC,uBAAwB,GAE1BC,YACE1xC,SAAS,GAEX2xC,UACE3xC,SAAS,EACT4xC,OAAQvuC,EAAG,GAAIC,EAAG,GAAIkuB,KAAM,MAE9BqgB,kBACE7xC,SAAS,EACT8xC,kBAAkB,GAEpBC,oBACE/xC,SAAQ,EACRgyC,gBAAiB,IACjBC,YAAa,IACbzlB,UAAW,KACX0lB,OAAQ,WAEVC,wBAAwB,EACxBC,cACEpyC,SAAS,EACTqyC,SAAS,EACTj6C,KAAM,aACNk6C,UAAW,IAEbC,YAAc,GACdC,YAAc,GACdC,WAAW,EACXC,wBAAyB,IACzBC,uBAAuB,EACvBhd,OAAQ,KACRQ,QAASA,EACTze,SACE5N,MAAO,IACP8kC,UAAW,QACXC,SAAU,GACVC,SAAU,UACVryC,OACEiB,OAAQ,OACRD,WAAY,YAGhBm1C,aAAa,EACbC,WAAW,EACX5jB,UAAU,EACVrxB,OAAO,EACPk1C,iBAAiB,EACjBC,iBAAiB,EACjBlvC,MAAQ,OACRC,OAAS,OACTy8B,YAAY,GAEdjvC,KAAK0hD,UAAY/gD,EAAK2E,UAAWtF,KAAKq0B,gBACtCr0B,KAAK2hD,WAAa,EAGlB3hD,KAAK4hD,UAAY5E,SAASc,UAC1B99C,KAAK6hD,oBAAqB,EAC1B7hD,KAAK8hD,mBAAqBC,YAAaC,SAGvChiD,KAAKiiD,eAAiB,EAAEjiD,KAAKo8C,kBAC7Bp8C,KAAKkiD,wBAA0B,iBAC/BliD,KAAKmiD,WAAa,EAClBniD,KAAKoiD,YAAc,EACnBpiD,KAAKqiD,YAAc,EACnBriD,KAAKsiD,kBAAoB,EACzBtiD,KAAKuiD,kBAAoB,EACzBviD,KAAKwiD,eAAiB,KACtBxiD,KAAKyiD,mBAAqB,KAC1BziD,KAAK0iD,UAAY,CAGjB,IAAIv/C,GAAUnD,IACdA,MAAKm0B,OAAS,GAAI9wB,GAClBrD,KAAK2iD,OAAS,GAAIr/C,GAClBtD,KAAK2iD,OAAOC,kBAAkB,WAC5Bz/C,EAAQ0/C,YAIV7iD,KAAK8iD,WAAa,EAClB9iD,KAAK+iD,WAAa,EAClB/iD,KAAKgjD,cAAgB,EAIrBhjD,KAAKijD,qBAELjjD,KAAK00B,UAEL10B,KAAKkjD,oBAELljD,KAAKmjD,qBAELnjD,KAAKojD,uBAELpjD,KAAKqjD,uBAILrjD,KAAKsjD,gBAAgBtjD,KAAKsf,MAAME,YAAc,EAAGxf,KAAKsf,MAAMuF,aAAe,GAC3E7kB,KAAKgd,UAAU,GACfhd,KAAKkT,WAAWzE,GAGhBzO,KAAKujD,kBAAmB,EACxBvjD,KAAKwjD,mBACLxjD,KAAKyjD,sBAAuB,EAC5BzjD,KAAK0jD,YAAa,EAClB1jD,KAAKohD,wBAA0B,KAC/BphD,KAAK2jD,eAAgB,EAGrB3jD,KAAK4jD,oBACL5jD,KAAK6jD,0BACL7jD,KAAK8jD,eACL9jD,KAAKg9C,SACLh9C,KAAK89C,SAGL99C,KAAK+jD,eAAqBhyC,EAAK,EAAEC,EAAK,GACtChS,KAAKgkD,mBAAqBjyC,EAAK,EAAEC,EAAK,GACtChS,KAAKikD,iBAAmBlyC,EAAK,EAAEC,EAAK,GACpChS,KAAKkkD,cACLlkD,KAAKid,MAAQ,EACbjd,KAAKmkD,cAAgBnkD,KAAKid,MAG1Bjd,KAAKokD,UAAY,KACjBpkD,KAAKqkD,UAAY,KAGjBrkD,KAAKskD,gBACHrxC,IAAO,SAAU3J,EAAOwK,GACtB3Q,EAAQohD,UAAUzwC,EAAO7R,OACzBkB,EAAQyM,SAEViF,OAAU,SAAUvL,EAAOwK,GACzB3Q,EAAQqhD,aAAa1wC,EAAO7R,MAAO6R,EAAOpB,MAC1CvP,EAAQyM,SAEVyG,OAAU,SAAU/M,EAAOwK,GACzB3Q,EAAQshD,aAAa3wC,EAAO7R,OAC5BkB,EAAQyM,UAGZ5P,KAAK0kD,gBACHzxC,IAAO,SAAU3J,EAAOwK,GACtB3Q,EAAQwhD,UAAU7wC,EAAO7R,OACzBkB,EAAQyM,SAEViF,OAAU,SAAUvL,EAAOwK,GACzB3Q,EAAQyhD,aAAa9wC,EAAO7R,OAC5BkB,EAAQyM,SAEVyG,OAAU,SAAU/M,EAAOwK,GACzB3Q,EAAQ0hD,aAAa/wC,EAAO7R,OAC5BkB,EAAQyM,UAKZ5P,KAAK8kD,QAAS,EACd9kD,KAAK+kD,MAAQv+C,OAGbxG,KAAKgY,QAAQtF,EAAK1S,KAAK0hD,UAAUvC,WAAWzwC,SAAW1O,KAAK0hD,UAAUjB,mBAAmB/xC,SAGzF1O,KAAK08C,cAAe,EAC6B,GAA7C18C,KAAK0hD,UAAUjB,mBAAmB/xC,QACpC1O,KAAKglD,2BAI2B,GAA5BhlD,KAAK0hD,UAAUP,WACjBnhD,KAAKilD,WAAWz+C,QAAW,EAAKxG,KAAK0hD,UAAUvC,WAAWzwC,SAK1D1O,KAAK0hD,UAAUvC,WAAWzwC,SAC5B1O,KAAKklD,sBA/VT,GAAInoC,GAAU7c,EAAoB,IAC9B6D,EAAS7D,EAAoB,IAC7BilD,EAAWjlD,EAAoB,IAC/BS,EAAOT,EAAoB,GAC3BklD,EAAallD,EAAoB,IACjCW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BuD,EAAYvD,EAAoB,IAChCwD,EAAcxD,EAAoB,IAClCmD,EAASnD,EAAoB,IAC7BoD,EAASpD,EAAoB,IAC7BqD,EAAOrD,EAAoB,IAC3BkD,EAAOlD,EAAoB,IAC3BsD,EAAQtD,EAAoB,IAC5BmlD,EAAcnlD,EAAoB,IAClColD,EAAYplD,EAAoB,IAChC2kC,EAAU3kC,EAAoB,GAGlCA,GAAoB,IAiVpB6c,EAAQ7Z,EAAQiQ,WAOhBjQ,EAAQiQ,UAAU+oC,wBAA0B,WAC1C,GAAIqJ,GAAcv8C,UAAUC,UAAUs7B,aACtCvkC,MAAKwlD,iBAAkB,EACgB,IAAnCD,EAAY5+C,QAAQ,YACtB3G,KAAKwlD,iBAAkB,EAEiB,IAAjCD,EAAY5+C,QAAQ,WACvB4+C,EAAY5+C,QAAQ,WAAa,KACnC3G,KAAKwlD,iBAAkB,IAa7BtiD,EAAQiQ,UAAUsyC,eAAiB,WAIjC,IAAK,GAHDC,GAAUn0C,SAASo0C,qBAAsB,UAGpCngD,EAAI,EAAGA,EAAIkgD,EAAQ//C,OAAQH,IAAK,CACvC,GAAIogD,GAAMF,EAAQlgD,GAAGogD,IACjBrhD,EAAQqhD,GAAO,qBAAqBnhD,KAAKmhD,EAC7C,IAAIrhD,EAEF,MAAOqhD,GAAI1gB,UAAU,EAAG0gB,EAAIjgD,OAASpB,EAAM,GAAGoB,QAIlD,MAAO,OAQTzC,EAAQiQ,UAAU0yC,UAAY,WAC5B,GAAsDC,GAAlDC,EAAO,IAAKC,EAAO,KAAMC,EAAO,IAAKC,EAAO,IAChD,KAAK,GAAIC,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACdF,EAAQH,EAAKM,YAAgB,OAAIH,EAAOH,EAAKM,YAAY3+C,MACzDy+C,EAAQJ,EAAKM,YAAiB,QAAIF,EAAOJ,EAAKM,YAAY/+B,OAC1D0+B,EAAQD,EAAKM,YAAkB,SAAIL,EAAOD,EAAKM,YAAY9iC,QAC3D0iC,EAAQF,EAAKM,YAAe,MAAIJ,EAAOF,EAAKM,YAAYz+C,KAMhE,OAHY,MAARs+C,GAAuB,MAARC,GAAwB,KAARH,GAAuB,MAARC,IAChDD,EAAO,EAAGC,EAAO,EAAGC,EAAO,EAAGC,EAAO,IAE/BD,KAAMA,EAAMC,KAAMA,EAAMH,KAAMA,EAAMC,KAAMA,IASpD9iD,EAAQiQ,UAAUkzC,YAAc,SAAS5wB,GACvC,OAAQ1jB,EAAI,IAAO0jB,EAAMywB,KAAOzwB,EAAMwwB,MAC9Bj0C,EAAI,IAAOyjB,EAAMuwB,KAAOvwB,EAAMswB,QAUxC7iD,EAAQiQ,UAAU8xC,WAAa,SAASqB,EAAkBC,EAAaC,GACrExmD,KAAK6iD,SAAQ,GAEOr8C,SAAhB+/C,IACFA,GAAc,GAEK//C,SAAjBggD,IACFA,GAAe,GAEQhgD,SAArB8/C,IACFA,GAAmB,EAGrB,IACIG,GADAhxB,EAAQz1B,KAAK6lD,WAGjB,IAAmB,GAAfU,EAAqB,CACvB,GAAIG,GAAgB1mD,KAAK8jD,YAAYn+C,MAIjC8gD,GAH+B,GAA/BzmD,KAAK0hD,UAAUZ,aACwB,GAArC9gD,KAAK0hD,UAAUvC,WAAWzwC,SAC5Bg4C,GAAiB1mD,KAAK0hD,UAAUvC,WAAWC,gBAC/B,UAAYsH,EAAgB,WAAa,SAGzC,QAAUA,EAAgB,QAAU,SAIT,GAArC1mD,KAAK0hD,UAAUvC,WAAWzwC,SAC1Bg4C,GAAiB1mD,KAAK0hD,UAAUvC,WAAWC,gBACjC,YAAcsH,EAAgB,YAAc,cAG5C,YAAcA,EAAgB,aAAe,SAK7D,IAAIC,GAASzhD,KAAK4G,IAAI9L,KAAKsf,MAAMC,OAAOC,YAAc,IAAKxf,KAAKsf,MAAMC,OAAOsF,aAAe,IAC5F4hC,IAAaE,MAEV,CACH,GAAInP,GAAgD,IAApCtyC,KAAK2lB,IAAI4K,EAAMywB,KAAOzwB,EAAMwwB,MACxCW,EAAgD,IAApC1hD,KAAK2lB,IAAI4K,EAAMuwB,KAAOvwB,EAAMswB,MAExCc,EAAa7mD,KAAKsf,MAAMC,OAAOC,YAAeg4B,EAC9CsP,EAAa9mD,KAAKsf,MAAMC,OAAOsF,aAAe+hC,CAElDH,GAA2BK,GAAdD,EAA4BA,EAAaC,EAGpDL,EAAY,IACdA,EAAY,EAId,IAAIv6B,GAASlsB,KAAKqmD,YAAY5wB,EAC9B,IAAoB,GAAhB+wB,EAAuB,CACzB,GAAI/3C,IAAWmV,SAAUsI,EAAQjP,MAAOwpC,EAAWM,UAAWT,EAC9DtmD,MAAK6nB,OAAOpZ,GACZzO,KAAK8kD,QAAS,EACd9kD,KAAK4P,YAGLsc,GAAOna,GAAK00C,EACZv6B,EAAOla,GAAKy0C,EACZv6B,EAAOna,GAAK,GAAM/R,KAAKsf,MAAMC,OAAOC,YACpC0M,EAAOla,GAAK,GAAMhS,KAAKsf,MAAMC,OAAOsF,aACpC7kB,KAAKgd,UAAUypC,GACfzmD,KAAKsjD,iBAAiBp3B,EAAOna,GAAGma,EAAOla,IAS3C9O,EAAQiQ,UAAU6zC,qBAAuB,WACvChnD,KAAKinD,qBACL,KAAK,GAAIC,KAAOlnD,MAAKg9C,MACfh9C,KAAKg9C,MAAMl3C,eAAeohD,IAC5BlnD,KAAK8jD,YAAY97C,KAAKk/C,IAiB5BhkD,EAAQiQ,UAAU6E,QAAU,SAAStF,EAAM8zC,GAOzC,GANqBhgD,SAAjBggD,IACFA,GAAe,GAGjBxmD,KAAK08C,cAAe,EAEhBhqC,GAAQA,EAAKod,MAAQpd,EAAKsqC,OAAStqC,EAAKorC,OAC1C,KAAM,IAAItkC,aAAY,iGAYxB,IAP+C,GAA3CxZ,KAAK0hD,UAAUnB,iBAAiB7xC,SAClC1O,KAAKmnD,wBAIPnnD,KAAKkT,WAAWR,GAAQA,EAAKjE,SAEzBiE,GAAQA,EAAKod,KAEf,GAAGpd,GAAQA,EAAKod,IAAK,CACnB,GAAIs3B,GAAU3jD,EAAU4jD,WAAW30C,EAAKod,IAExC,YADA9vB,MAAKgY,QAAQovC,QAIZ,IAAI10C,GAAQA,EAAK40C,OAEpB,GAAG50C,GAAQA,EAAK40C,MAAO,CACrB,GAAIC,GAAY7jD,EAAY8jD,WAAW90C,EAAK40C,MAE5C,YADAtnD,MAAKgY,QAAQuvC,QAKfvnD,MAAKynD,UAAU/0C,GAAQA,EAAKsqC,OAC5Bh9C,KAAK0nD,UAAUh1C,GAAQA,EAAKorC,MAE9B99C,MAAK2nD,mBACe,GAAhBnB,IAC+C,GAA7CxmD,KAAK0hD,UAAUjB,mBAAmB/xC,SACpC1O,KAAK4nD,eACL5nD,KAAKglD,4BAIDhlD,KAAK0hD,UAAUP,WACjBnhD,KAAK6nD,aAGT7nD,KAAK4P,SAEP5P,KAAK08C,cAAe,GAOtBx5C,EAAQiQ,UAAUD,WAAa,SAAUzE,GACvC,GAAIA,EAAS,CACX,GAAI5I,GACAqI,GAAU,QAAQ,QAAQ,eAAe,qBAAqB,aAAa,aAC7E,WAAW,mBAAmB,QAAQ,SAAS,aAAa,YAAY,WAAW,aAOrF,IAJAvN,EAAK+F,uBAAuBwH,EAAOlO,KAAK0hD,UAAWjzC,GACnD9N,EAAK+F,wBAAwB,SAAS1G,KAAK0hD,UAAU1E,MAAOvuC,EAAQuuC,OACpEr8C,EAAK+F,wBAAwB,QAAQ,UAAU1G,KAAK0hD,UAAU5D,MAAOrvC,EAAQqvC,OAEzErvC,EAAQ+vC,UACV79C,EAAK4N,aAAavO,KAAK0hD,UAAUlD,QAAS/vC,EAAQ+vC,QAAQ,aAC1D79C,EAAK4N,aAAavO,KAAK0hD,UAAUlD,QAAS/vC,EAAQ+vC,QAAQ,aAEtD/vC,EAAQ+vC,QAAQU,uBAAuB,CACzCl/C,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,CAC3C,KAAK7I,IAAQ4I,GAAQ+vC,QAAQU,sBACvBzwC,EAAQ+vC,QAAQU,sBAAsBp5C,eAAeD,KACvD7F,KAAK0hD,UAAUlD,QAAQU,sBAAsBr5C,GAAQ4I,EAAQ+vC,QAAQU,sBAAsBr5C;CAkDnG,GA5CI4I,EAAQ2gC,QAAQpvC,KAAK28C,iBAAiB1pC,IAAMxE,EAAQ2gC,OACpD3gC,EAAQq5C,SAAS9nD,KAAK28C,iBAAiBC,KAAOnuC,EAAQq5C,QACtDr5C,EAAQs5C,aAAa/nD,KAAK28C,iBAAiBE,SAAWpuC,EAAQs5C,YAC9Dt5C,EAAQu5C,YAAYhoD,KAAK28C,iBAAiBG,QAAUruC,EAAQu5C,WAC5Dv5C,EAAQw5C,WAAWjoD,KAAK28C,iBAAiBI,IAAMtuC,EAAQw5C,UAE3DtnD,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,gBAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,sBAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,cAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,cAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,YAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,oBAGtCA,EAAQ8xC,mBACVvgD,KAAKkoD,SAAWloD,KAAK0hD,UAAUnB,iBAAiBC,kBAK9C/xC,EAAQqvC,QACkBt3C,SAAxBiI,EAAQqvC,MAAM3yC,QACZxK,EAAKwD,SAASsK,EAAQqvC,MAAM3yC,QAC9BnL,KAAK0hD,UAAU5D,MAAM3yC,SACrBnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMA,MAAQsD,EAAQqvC,MAAM3yC,MACjDnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMkB,UAAYoC,EAAQqvC,MAAM3yC,MACrDnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMmB,MAAQmC,EAAQqvC,MAAM3yC,QAGf3E,SAA9BiI,EAAQqvC,MAAM3yC,MAAMA,QAA0BnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMA,MAAQsD,EAAQqvC,MAAM3yC,MAAMA,OACnE3E,SAAlCiI,EAAQqvC,MAAM3yC,MAAMkB,YAA0BrM,KAAK0hD,UAAU5D,MAAM3yC,MAAMkB,UAAYoC,EAAQqvC,MAAM3yC,MAAMkB,WAC3E7F,SAA9BiI,EAAQqvC,MAAM3yC,MAAMmB,QAA0BtM,KAAK0hD,UAAU5D,MAAM3yC,MAAMmB,MAAQmC,EAAQqvC,MAAM3yC,MAAMmB,QAE3GtM,KAAK0hD,UAAU5D,MAAMQ,cAAe,GAGjC7vC,EAAQqvC,MAAMR,WACW92C,SAAxBiI,EAAQqvC,MAAM3yC,QACZxK,EAAKwD,SAASsK,EAAQqvC,MAAM3yC,OAAmBnL,KAAK0hD,UAAU5D,MAAMR,UAAY7uC,EAAQqvC,MAAM3yC,MAC3D3E,SAA9BiI,EAAQqvC,MAAM3yC,MAAMA,QAAsBnL,KAAK0hD,UAAU5D,MAAMR,UAAY7uC,EAAQqvC,MAAM3yC,MAAMA,SAK1GsD,EAAQuuC,OACNvuC,EAAQuuC,MAAM7xC,MAAO,CACvB,GAAIg9C,GAAcxnD,EAAKuK,WAAWuD,EAAQuuC,MAAM7xC,MAChDnL,MAAK0hD,UAAU1E,MAAM7xC,MAAMgB,WAAag8C,EAAYh8C,WACpDnM,KAAK0hD,UAAU1E,MAAM7xC,MAAMiB,OAAS+7C,EAAY/7C,OAChDpM,KAAK0hD,UAAU1E,MAAM7xC,MAAMkB,UAAUF,WAAag8C,EAAY97C,UAAUF,WACxEnM,KAAK0hD,UAAU1E,MAAM7xC,MAAMkB,UAAUD,OAAS+7C,EAAY97C,UAAUD,OACpEpM,KAAK0hD,UAAU1E,MAAM7xC,MAAMmB,MAAMH,WAAag8C,EAAY77C,MAAMH,WAChEnM,KAAK0hD,UAAU1E,MAAM7xC,MAAMmB,MAAMF,OAAS+7C,EAAY77C,MAAMF,OAGhE,GAAIqC,EAAQ0lB,OACV,IAAK,GAAIi0B,KAAa35C,GAAQ0lB,OAC5B,GAAI1lB,EAAQ0lB,OAAOruB,eAAesiD,GAAY,CAC5C,GAAIn2C,GAAQxD,EAAQ0lB,OAAOi0B,EAC3BpoD,MAAKm0B,OAAOlhB,IAAIm1C,EAAWn2C,GAKjC,GAAIxD,EAAQ2X,QAAS,CACnB,IAAKvgB,IAAQ4I,GAAQ2X,QACf3X,EAAQ2X,QAAQtgB,eAAeD,KACjC7F,KAAK0hD,UAAUt7B,QAAQvgB,GAAQ4I,EAAQ2X,QAAQvgB,GAG/C4I,GAAQ2X,QAAQjb,QAClBnL,KAAK0hD,UAAUt7B,QAAQjb,MAAQxK,EAAKuK,WAAWuD,EAAQ2X,QAAQjb,QAmBnE,GAfI,cAAgBsD,KACdA,EAAQ45C,WACLroD,KAAKsoD,YACRtoD,KAAKsoD,UAAY,GAAIhD,GAAUtlD,KAAKsf,OACpCtf,KAAKsoD,UAAU/0C,GAAG,SAAUvT,KAAKuoD,gBAAgBzzB,KAAK90B,QAIpDA,KAAKsoD,YACPtoD,KAAKsoD,UAAUh1C,gBACRtT,MAAKsoD,YAKd75C,EAAQg4B,OACV,KAAM,IAAI7iC,OAAM,6EAMlB5D,MAAKijD,qBAELjjD,KAAKwoD,0BAELxoD,KAAKyoD,0BAELzoD,KAAK0oD,yBAIL1oD,KAAKuoD,kBAELvoD,KAAK2kB,QAAQ3kB,KAAK0hD,UAAUnvC,MAAOvS,KAAK0hD,UAAUlvC,QAClDxS,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAaT1M,EAAQiQ,UAAUuhB,QAAU,WAE1B,KAAO10B,KAAKyZ,iBAAiBiK,iBAC3B1jB,KAAKyZ,iBAAiBtI,YAAYnR,KAAKyZ,iBAAiBkK,WAe1D,IAZA3jB,KAAKsf,MAAQ/N,SAASM,cAAc,OACpC7R,KAAKsf,MAAMzX,UAAY,oBACvB7H,KAAKsf,MAAMrS,MAAM2W,SAAW,WAC5B5jB,KAAKsf,MAAMrS,MAAM4W,SAAW,SAK5B7jB,KAAKsf,MAAMC,OAAShO,SAASM,cAAc,UAC3C7R,KAAKsf,MAAMC,OAAOtS,MAAM2W,SAAW,WACnC5jB,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMC,QAE7Bvf,KAAKsf,MAAMC,OAAOyH,WAQlB,CACH,GAAID,GAAM/mB,KAAKsf,MAAMC,OAAOyH,WAAW,KACvChnB,MAAK2hD,YAAcp4C,OAAOo/C,kBAAoB,IAAM5hC,EAAI6hC,8BAC9C7hC,EAAI8hC,2BACJ9hC,EAAI+hC,0BACJ/hC,EAAIgiC,yBACJhiC,EAAIiiC,wBAA0B,GAExChpD,KAAKsf,MAAMC,OAAOyH,WAAW,MAAMiiC,aAAajpD,KAAK2hD,WAAY,EAAG,EAAG3hD,KAAK2hD,WAAY,EAAG,OAhB1D,CACjC,GAAI79B,GAAWvS,SAASM,cAAe,MACvCiS,GAAS7W,MAAM9B,MAAQ,MACvB2Y,EAAS7W,MAAM8W,WAAc,OAC7BD,EAAS7W,MAAM+W,QAAW,OAC1BF,EAASG,UAAa,mDACtBjkB,KAAKsf,MAAMC,OAAO9N,YAAYqS,GAgBhC,GAAI3P,GAAKnU,IACTA,MAAKwlC,QACLxlC,KAAKkpD,SACLlpD,KAAK8D,OAAS,GAAIC,GAAO/D,KAAKsf,MAAMC,QACpCvf,KAAK8D,OAAOoR,IAAI,SAASi0C,KAAK/lB,QAAQ,IAEtCpjC,KAAK8D,OAAOyP,GAAG,MAAaY,EAAGi1C,OAAOt0B,KAAK3gB,IAC3CnU,KAAK8D,OAAOyP,GAAG,YAAaY,EAAGk1C,aAAav0B,KAAK3gB,IACjDnU,KAAK8D,OAAOyP,GAAG,QAAaY,EAAGgqB,QAAQrJ,KAAK3gB,IAC5CnU,KAAK8D,OAAOyP,GAAG,QAAaY,EAAGmqB,SAASxJ,KAAK3gB,IAC7CixC,EAAWkE,QAAQtpD,KAAK8D,OAAQqQ,EAAGkqB,SAASvJ,KAAK3gB,IACjDnU,KAAK8D,OAAOyP,GAAG,WAAaY,EAAG6pB,aAAalJ,KAAK3gB,IACjDnU,KAAK8D,OAAOyP,GAAG,UAAaY,EAAG8pB,QAAQnJ,KAAK3gB,IAC5CnU,KAAK8D,OAAOyP,GAAG,SAAaY,EAAG+pB,WAAWpJ,KAAK3gB,IAE/CnU,KAAKsf,MAAMC,OAAO5W,iBAAiB,YAAawL,EAAGo1C,kBAAkBz0B,KAAK3gB,IAC1EnU,KAAKsf,MAAMC,OAAO5W,iBAAiB,aAAcwL,EAAGiqB,cAActJ,KAAK3gB,IACvEnU,KAAKsf,MAAMC,OAAO5W,iBAAiB,iBAAkBwL,EAAGiqB,cAActJ,KAAK3gB,IAG3EnU,KAAKyZ,iBAAiBhI,YAAYzR,KAAKsf,QAQzCpc,EAAQiQ,UAAUo1C,gBAAkB,WAClC,GAAIp0C,GAAKnU,IACawG,UAAlBxG,KAAKmlD,UACPnlD,KAAKmlD,SAAS7xC,UAEhBtT,KAAKmlD,SAAWA,IAEhBnlD,KAAKmlD,SAASqE,QAEVxpD,KAAK0hD,UAAUrB,SAAS3xC,SAAW1O,KAAKypD,aAC1CzpD,KAAKmlD,SAASrwB,KAAK,KAAQ90B,KAAK0pD,QAAQ50B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,KAAQ90B,KAAK2pD,aAAa70B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK4pD,UAAU90B,KAAK3gB,GAAM,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK2pD,aAAa70B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK6pD,UAAU/0B,KAAK3gB,GAAM,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK8pD,aAAah1B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,QAAQ90B,KAAK+pD,WAAWj1B,KAAK3gB,GAAK,WACrDnU,KAAKmlD,SAASrwB,KAAK,QAAQ90B,KAAK8pD,aAAah1B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKkqD,SAASp1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKkqD,SAASp1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKkqD,SAASp1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,SAAS90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,SAAS90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAO,SACvDnU,KAAKmlD,SAASrwB,KAAK,WAAW90B,KAAKkqD,SAASp1B,KAAK3gB,GAAI,WACrDnU,KAAKmlD,SAASrwB,KAAK,WAAW90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAK,UAGV,GAA3CnU,KAAK0hD,UAAUnB,iBAAiB7xC,UAClC1O,KAAKmlD,SAASrwB,KAAK,MAAM90B,KAAKmnD,sBAAsBryB,KAAK3gB,IACzDnU,KAAKmlD,SAASrwB,KAAK,SAAS90B,KAAKmqD,gBAAgBr1B,KAAK3gB,MAU1DjR,EAAQiQ,UAAUG,QAAU,WAC1BtT,KAAK4P,MAAQ,aACb5P,KAAKyhB,OAAS,aACdzhB,KAAK+kD,OAAQ,EAGb/kD,KAAKoqD,+BAGLpqD,KAAKmlD,SAASqE,QAGdxpD,KAAK8D,OAAOwP,UAGZtT,KAAK0T,MAEL1T,KAAKqqD,oBAAoBrqD,KAAKyZ,mBAGhCvW,EAAQiQ,UAAUk3C,oBAAsB,SAASC,GAC/C,KAAoC,GAA7BA,EAAU5mC,iBACf1jB,KAAKqqD,oBAAoBC,EAAU3mC,YACnC2mC,EAAUn5C,YAAYm5C,EAAU3mC,aAUpCzgB,EAAQiQ,UAAUo3C,YAAc,SAAUr+B,GACxC,OACEna,EAAGma,EAAOna,EAAIpR,EAAK2G,gBAAgBtH,KAAKsf,MAAMC,QAC9CvN,EAAGka,EAAOla,EAAIrR,EAAK+G,eAAe1H,KAAKsf,MAAMC,UASjDrc,EAAQiQ,UAAUkrB,SAAW,SAAU/0B,IACjC,GAAIhF,OAAO0C,UAAYhH,KAAK0iD,UAAY,MAC1C1iD,KAAKwlC,KAAK3F,QAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,QAC3ClsB,KAAKwlC,KAAKglB,SAAU,EACpBxqD,KAAKkpD,MAAMjsC,MAAQjd,KAAKyqD,YAGxBzqD,KAAK0iD,WAAY,GAAIp+C,OAAO0C,UAE5BhH,KAAK0qD,aAAa1qD,KAAKwlC,KAAK3F,WAQhC38B,EAAQiQ,UAAU6qB,aAAe,SAAU10B,GACzCtJ,KAAK2qD,iBAAiBrhD,IAUxBpG,EAAQiQ,UAAUw3C,iBAAmB,SAASrhD,GAElB9C,SAAtBxG,KAAKwlC,KAAK3F,SACZ7/B,KAAKq+B,SAAS/0B,EAGhB,IAAIw8C,GAAO9lD,KAAK4qD,WAAW5qD,KAAKwlC,KAAK3F,QASrC,IANA7/B,KAAKwlC,KAAKzG,UAAW,EACrB/+B,KAAKwlC,KAAK2K,aACVnwC,KAAKwlC,KAAK/nB,YAAczd,KAAK6qD,kBAC7B7qD,KAAKwlC,KAAK2gB,OAAS,KACnBnmD,KAAK2jD,eAAgB,EAET,MAARmC,GAA4C,GAA5B9lD,KAAK0hD,UAAUH,UAAmB,CACpDvhD,KAAK2jD,eAAgB,EACrB3jD,KAAKwlC,KAAK2gB,OAASL,EAAKzlD,GAEnBylD,EAAKgF,cACR9qD,KAAK+qD,cAAcjF,GAAK,GAG1B9lD,KAAK4tB,KAAK,aAAao9B,QAAQhrD,KAAK42B,eAAeomB,OAGnD,KAAK,GAAIiO,KAAYjrD,MAAKkrD,aAAalO,MACrC,GAAIh9C,KAAKkrD,aAAalO,MAAMl3C,eAAemlD,GAAW,CACpD,GAAIhnD,GAASjE,KAAKkrD,aAAalO,MAAMiO,GACjCr/C,GACFvL,GAAI4D,EAAO5D,GACXylD,KAAM7hD,EAGN8N,EAAG9N,EAAO8N,EACVC,EAAG/N,EAAO+N,EACVm5C,OAAQlnD,EAAOknD,OACfC,OAAQnnD,EAAOmnD,OAGjBnnD,GAAOknD,QAAS,EAChBlnD,EAAOmnD,QAAS,EAEhBprD,KAAKwlC,KAAK2K,UAAUnoC,KAAK4D,IAK/BtC,EAAMD,kBAQRnG,EAAQiQ,UAAU8qB,QAAU,SAAU30B,GACpCtJ,KAAKqrD,cAAc/hD,IAUrBpG,EAAQiQ,UAAUk4C,cAAgB,SAAS/hD,GACzC,IAAItJ,KAAKwlC,KAAKglB,QAAd,CAKAxqD,KAAKsrD,aAEL,IAAIzrB,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,QACjC/X,EAAKnU,KACLwlC,EAAOxlC,KAAKwlC,KACZ2K,EAAY3K,EAAK2K,SACrB,IAAIA,GAAaA,EAAUxqC,QAAsC,GAA5B3F,KAAK0hD,UAAUH,UAAmB,CAErE,GAAI/hB,GAASK,EAAQ9tB,EAAIyzB,EAAK3F,QAAQ9tB,EAClC0tB,EAASI,EAAQ7tB,EAAIwzB,EAAK3F,QAAQ7tB,CAGtCm+B,GAAU9nC,QAAQ,SAAUuD,GAC1B,GAAIk6C,GAAOl6C,EAAEk6C,IAERl6C,GAAEu/C,SACLrF,EAAK/zC,EAAIoC,EAAGo3C,qBAAqBp3C,EAAGq3C,qBAAqB5/C,EAAEmG,GAAKytB,IAG7D5zB,EAAEw/C,SACLtF,EAAK9zC,EAAImC,EAAGs3C,qBAAqBt3C,EAAGu3C,qBAAqB9/C,EAAEoG,GAAKytB,MAM/Dz/B,KAAK8kD,SACR9kD,KAAK8kD,QAAS,EACd9kD,KAAK4P,aAKP,IAAkC,GAA9B5P,KAAK0hD,UAAUJ,YAAqB,CAEtC,GAA0B96C,SAAtBxG,KAAKwlC,KAAK3F,QAEZ,WADA7/B,MAAK2qD,iBAAiBrhD,EAGxB,IAAI8jB,GAAQyS,EAAQ9tB,EAAI/R,KAAKwlC,KAAK3F,QAAQ9tB,EACtCsb,EAAQwS,EAAQ7tB,EAAIhS,KAAKwlC,KAAK3F,QAAQ7tB,CAE1ChS,MAAKsjD,gBACHtjD,KAAKwlC,KAAK/nB,YAAY1L,EAAIqb,EAC1BptB,KAAKwlC,KAAK/nB,YAAYzL,EAAIqb,GAE5BrtB,KAAK6iD,UAITv5C,EAAMD,mBAORnG,EAAQiQ,UAAU+qB,WAAa,SAAU50B,GACvCtJ,KAAK2rD,eAAeriD,IAItBpG,EAAQiQ,UAAUw4C,eAAiB,SAASriD,GAC1CtJ,KAAKwlC,KAAKzG,UAAW,CACrB,IAAIoR,GAAYnwC,KAAKwlC,KAAK2K,SACtBA,IAAaA,EAAUxqC,QACzBwqC,EAAU9nC,QAAQ,SAAUuD,GAE1BA,EAAEk6C,KAAKqF,OAASv/C,EAAEu/C,OAClBv/C,EAAEk6C,KAAKsF,OAASx/C,EAAEw/C,SAEpBprD,KAAK8kD,QAAS,EACd9kD,KAAK4P,SAGL5P,KAAK6iD,UAEmB,GAAtB7iD,KAAK2jD,cACP3jD,KAAK4tB,KAAK,WAAWo9B,aAGrBhrD,KAAK4tB,KAAK,WAAWo9B,QAAQhrD,KAAK42B,eAAeomB,QAGnD1zC,EAAMD,kBAORnG,EAAQiQ,UAAUi2C,OAAS,SAAU9/C,GACnC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAKikD,gBAAkBpkB,EACvB7/B,KAAK4rD,WAAW/rB,IASlB38B,EAAQiQ,UAAUk2C,aAAe,SAAU//C,GACzC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAK6rD,iBAAiBhsB,IAQxB38B,EAAQiQ,UAAUgrB,QAAU,SAAU70B,GACpC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAKikD,gBAAkBpkB,EACvB7/B,KAAK8rD,cAAcjsB,IAQrB38B,EAAQiQ,UAAU44C,WAAa,SAAUziD,GACvC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAKgsD,iBAAiBnsB,IAQxB38B,EAAQiQ,UAAUmrB,SAAW,SAAUh1B,GACrC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OAErClsB,MAAKwlC,KAAKglB,SAAU,EACd,SAAWxqD,MAAKkpD,QACpBlpD,KAAKkpD,MAAMjsC,MAAQ,EAIrB,IAAIA,GAAQjd,KAAKkpD,MAAMjsC,MAAQ3T,EAAM2T,KACrCjd,MAAKisD,MAAMhvC,EAAO4iB,IAUpB38B,EAAQiQ,UAAU84C,MAAQ,SAAShvC,EAAO4iB,GACxC,GAA+B,GAA3B7/B,KAAK0hD,UAAU/jB,SAAkB,CACnC,GAAIuuB,GAAWlsD,KAAKyqD,WACR,MAARxtC,IACFA,EAAQ,MAENA,EAAQ,KACVA,EAAQ,GAGV,IAAIkvC,GAAsB,IACR3lD,UAAdxG,KAAKwlC,MACmB,GAAtBxlC,KAAKwlC,KAAKzG,WACZotB,EAAsBnsD,KAAKosD,YAAYpsD,KAAKwlC,KAAK3F,SAIrD,IAAIpiB,GAAczd,KAAK6qD,kBAEnBwB,EAAYpvC,EAAQivC,EACpBI,GAAM,EAAID,GAAaxsB,EAAQ9tB,EAAI0L,EAAY1L,EAAIs6C,EACnDE,GAAM,EAAIF,GAAaxsB,EAAQ7tB,EAAIyL,EAAYzL,EAAIq6C,CASvD,IAPArsD,KAAKkkD,YAAcnyC,EAAM/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACxCC,EAAMhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAE3DhS,KAAKgd,UAAUC,GACfjd,KAAKsjD,gBAAgBgJ,EAAIC,GACzBvsD,KAAKwsD,wBAEsB,MAAvBL,EAA6B,CAC/B,GAAIM,GAAuBzsD,KAAK0sD,YAAYP,EAC5CnsD,MAAKwlC,KAAK3F,QAAQ9tB,EAAI06C,EAAqB16C,EAC3C/R,KAAKwlC,KAAK3F,QAAQ7tB,EAAIy6C,EAAqBz6C,EAY7C,MATAhS,MAAK6iD,UAEU5lC,EAAXivC,EACFlsD,KAAK4tB,KAAK,QAASsN,UAAU,MAG7Bl7B,KAAK4tB,KAAK,QAASsN,UAAU,MAGxBje,IAYX/Z,EAAQiQ,UAAUirB,cAAgB,SAAS90B,GAEzC,GAAImlB,GAAQ,CAYZ,IAXInlB,EAAMolB,WACRD,EAAQnlB,EAAMolB,WAAW,IAChBplB,EAAMqlB,SAGfF,GAASnlB,EAAMqlB,OAAO,GAMpBF,EAAO,CAET,GAAIxR,GAAQjd,KAAKyqD,YACbvqB,EAAOzR,EAAQ,EACP,GAARA,IACFyR,GAAe,EAAIA,GAErBjjB,GAAU,EAAIijB,CAGd,IAAIL,GAAU7/B,KAAKuqD,aAAax4C,EAAGzI,EAAMw2B,MAAO9tB,EAAG1I,EAAMy2B,OAGzD//B,MAAKisD,MAAMhvC,EAAO4iB,GAIpBv2B,EAAMD,kBASRnG,EAAQiQ,UAAUo2C,kBAAoB,SAAUjgD,GAC9C,GAAIu2B,GAAU7/B,KAAKuqD,aAAax4C,EAAGzI,EAAMw2B,MAAO9tB,EAAG1I,EAAMy2B,OAGrD//B,MAAK2sD,UACP3sD,KAAK4sD,gBAAgB/sB,EAKvB,IAAI1rB,GAAKnU,KACL6sD,EAAY,WACd14C,EAAG24C,gBAAgBjtB,GAarB,IAXI7/B,KAAK+sD,YACPt6B,cAAczyB,KAAK+sD,YAEhB/sD,KAAKwlC,KAAKzG,WACb/+B,KAAK+sD,WAAazzC,WAAWuzC,EAAW7sD,KAAK0hD,UAAUt7B,QAAQ5N,QAOrC,GAAxBxY,KAAK0hD,UAAUp1C,MAAe,CAEhC,IAAK,GAAI0gD,KAAUhtD,MAAK4hD,SAAS9D,MAC3B99C,KAAK4hD,SAAS9D,MAAMh4C,eAAeknD,KACrChtD,KAAK4hD,SAAS9D,MAAMkP,GAAQ1gD,OAAQ,QAC7BtM,MAAK4hD,SAAS9D,MAAMkP,GAK/B,IAAIjqC,GAAM/iB,KAAK4qD,WAAW/qB,EACf,OAAP9c,IACFA,EAAM/iB,KAAKitD,WAAWptB,IAEb,MAAP9c,GACF/iB,KAAKktD,aAAanqC,EAIpB,KAAK,GAAIojC,KAAUnmD,MAAK4hD,SAAS5E,MAC3Bh9C,KAAK4hD,SAAS5E,MAAMl3C,eAAeqgD,KACjCpjC,YAAexf,IAAQwf,EAAI1iB,IAAM8lD,GAAUpjC,YAAe3f,IAAe,MAAP2f,KACpE/iB,KAAKmtD,YAAYntD,KAAK4hD,SAAS5E,MAAMmJ,UAC9BnmD,MAAK4hD,SAAS5E,MAAMmJ,GAIjCnmD,MAAKyhB,WAYTve,EAAQiQ,UAAU25C,gBAAkB,SAAUjtB,GAC5C,GAOIx/B,GAPA0iB,GACFtb,KAAQzH,KAAKurD,qBAAqB1rB,EAAQ9tB,GAC1CpK,IAAQ3H,KAAKyrD,qBAAqB5rB,EAAQ7tB,GAC1CqV,MAAQrnB,KAAKurD,qBAAqB1rB,EAAQ9tB,GAC1CuR,OAAQtjB,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAIxCo7C,EAAgBptD,KAAK2sD,SACrBU,GAAkB,CAEtB,IAAqB7mD,QAAjBxG,KAAK2sD,SAAuB,CAE9B,GAAI3P,GAAQh9C,KAAKg9C,MACbsQ,IACJ,KAAKjtD,IAAM28C,GACT,GAAIA,EAAMl3C,eAAezF,GAAK,CAC5B,GAAIylD,GAAO9I,EAAM38C,EACbylD,GAAKyH,kBAAkBxqC,IACDvc,SAApBs/C,EAAK0H,YACPF,EAAiBtlD,KAAK3H,GAM1BitD,EAAiB3nD,OAAS,IAG5B3F,KAAK2sD,SAAW3sD,KAAKg9C,MAAMsQ,EAAiBA,EAAiB3nD,OAAS,IAEtE0nD,GAAkB,GAItB,GAAsB7mD,SAAlBxG,KAAK2sD,UAA6C,GAAnBU,EAA0B,CAE3D,GAAIvP,GAAQ99C,KAAK89C,MACb2P,IACJ,KAAKptD,IAAMy9C,GACT,GAAIA,EAAMh4C,eAAezF,GAAK,CAC5B,GAAIqtD,GAAO5P,EAAMz9C,EACbqtD,GAAKC,WAAkCnnD,SAApBknD,EAAKF,YACxBE,EAAKH,kBAAkBxqC,IACzB0qC,EAAiBzlD,KAAK3H,GAKxBotD,EAAiB9nD,OAAS,IAC5B3F,KAAK2sD,SAAW3sD,KAAK89C,MAAM2P,EAAiBA,EAAiB9nD,OAAS,KAI1E,GAAI3F,KAAK2sD,UAEP,GAAI3sD,KAAK2sD,UAAYS,EAAe,CAClC,GAAIj5C,GAAKnU,IACJmU,GAAGy5C,QACNz5C,EAAGy5C,MAAQ,GAAIpqD,GAAM2Q,EAAGmL,MAAOnL,EAAGutC,UAAUt7B,UAM9CjS,EAAGy5C,MAAMC,YAAYhuB,EAAQ9tB,EAAI,EAAG8tB,EAAQ7tB,EAAI,GAChDmC,EAAGy5C,MAAME,QAAQ35C,EAAGw4C,SAASa,YAC7Br5C,EAAGy5C,MAAMjmB,YAIP3nC,MAAK4tD,OACP5tD,KAAK4tD,MAAMlmB,QAYjBxkC,EAAQiQ,UAAUy5C,gBAAkB,SAAU/sB,GACvC7/B,KAAK2sD,UAAa3sD,KAAK4qD,WAAW/qB,KACrC7/B,KAAK2sD,SAAWnmD,OACZxG,KAAK4tD,OACP5tD,KAAK4tD,MAAMlmB,SAajBxkC,EAAQiQ,UAAUwR,QAAU,SAASpS,EAAOC,GAC1C,GAAIu7C,IAAY,EACZC,EAAWhuD,KAAKsf,MAAMC,OAAOhN,MAC7B07C,EAAYjuD,KAAKsf,MAAMC,OAAO/M,MAC9BD,IAASvS,KAAK0hD,UAAUnvC,OAASC,GAAUxS,KAAK0hD,UAAUlvC,QAAUxS,KAAKsf,MAAMrS,MAAMsF,OAASA,GAASvS,KAAKsf,MAAMrS,MAAMuF,QAAUA,GACpIxS,KAAKsf,MAAMrS,MAAMsF,MAAQA,EACzBvS,KAAKsf,MAAMrS,MAAMuF,OAASA,EAE1BxS,KAAKsf,MAAMC,OAAOtS,MAAMsF,MAAQ,OAChCvS,KAAKsf,MAAMC,OAAOtS,MAAMuF,OAAS,OAEjCxS,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,WAC/D3hD,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,WAEjE3hD,KAAK0hD,UAAUnvC,MAAQA,EACvBvS,KAAK0hD,UAAUlvC,OAASA,EAExBu7C,GAAY,IAMR/tD,KAAKsf,MAAMC,OAAOhN,OAASvS,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,aAClE3hD,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,WAC/DoM,GAAY,GAEV/tD,KAAKsf,MAAMC,OAAO/M,QAAUxS,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,aACpE3hD,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,WACjEoM,GAAY,IAIC,GAAbA,GACF/tD,KAAK4tB,KAAK,UAAWrb,MAAMvS,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAK2hD,WAAWnvC,OAAOxS,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAK2hD,WAAYqM,SAAUA,EAAWhuD,KAAK2hD,WAAYsM,UAAWA,EAAYjuD,KAAK2hD,cAS9Lz+C,EAAQiQ,UAAUs0C,UAAY,SAASzK,GACrC,GAAIkR,GAAeluD,KAAKokD,SAExB,IAAIpH,YAAiBn8C,IAAWm8C,YAAiBl8C,GAC/Cd,KAAKokD,UAAYpH,MAEd,IAAI/2C,MAAMC,QAAQ82C,GACrBh9C,KAAKokD,UAAY,GAAIvjD,GACrBb,KAAKokD,UAAUnxC,IAAI+pC,OAEhB,CAAA,GAAKA,EAIR,KAAM,IAAI32C,WAAU,4BAHpBrG,MAAKokD,UAAY,GAAIvjD,GAgBvB,GAVIqtD,GAEFvtD,EAAK0H,QAAQrI,KAAKskD,eAAgB,SAAUh8C,EAAUgB,GACpD4kD,EAAax6C,IAAIpK,EAAOhB,KAK5BtI,KAAKg9C,SAEDh9C,KAAKokD,UAAW,CAElB,GAAIjwC,GAAKnU,IACTW,GAAK0H,QAAQrI,KAAKskD,eAAgB,SAAUh8C,EAAUgB,GACpD6K,EAAGiwC,UAAU7wC,GAAGjK,EAAOhB,IAIzB,IAAI6M,GAAMnV,KAAKokD,UAAUvuC,QACzB7V,MAAKukD,UAAUpvC,GAEjBnV,KAAKmuD,oBAQPjrD,EAAQiQ,UAAUoxC,UAAY,SAASpvC,GAErC,IAAK,GADD9U,GACKmF,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9CnF,EAAK8U,EAAI3P,EACT,IAAIkN,GAAO1S,KAAKokD,UAAUlvC,IAAI7U,GAC1BylD,EAAO,GAAIviD,GAAKmP,EAAM1S,KAAK2iD,OAAQ3iD,KAAKm0B,OAAQn0B,KAAK0hD,UAEzD,IADA1hD,KAAKg9C,MAAM38C,GAAMylD,IACG,GAAfA,EAAKqF,QAAkC,GAAfrF,EAAKsF,QAAgC,OAAXtF,EAAK/zC,GAAyB,OAAX+zC,EAAK9zC,GAAa,CAC1F,GAAIyZ,GAAS,EAAStW,EAAIxP,OAAS,GAC/ByoD,EAAQ,EAAIlpD,KAAKymB,GAAKzmB,KAAKE,QACZ,IAAf0gD,EAAKqF,SAAkBrF,EAAK/zC,EAAI0Z,EAASvmB,KAAKqZ,IAAI6vC,IACnC,GAAftI,EAAKsF,SAAkBtF,EAAK9zC,EAAIyZ,EAASvmB,KAAKkZ,IAAIgwC,IAExDpuD,KAAK8kD,QAAS,EAGhB9kD,KAAKgnD,uBAC4C,GAA7ChnD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKquD,0BACLruD,KAAKsuD,kBACLtuD,KAAKuuD,kBAAkBvuD,KAAKg9C,OAC5Bh9C,KAAKwuD,gBAQPtrD,EAAQiQ,UAAUqxC,aAAe,SAASrvC,EAAIs5C,GAE5C,IAAK,GADDzR,GAAQh9C,KAAKg9C,MACRx3C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GACTsgD,EAAO9I,EAAM38C,GACbqS,EAAO+7C,EAAYjpD,EACnBsgD,GAEFA,EAAK4I,cAAch8C,EAAM1S,KAAK0hD,YAI9BoE,EAAO,GAAIviD,GAAKorD,WAAY3uD,KAAK2iD,OAAQ3iD,KAAKm0B,OAAQn0B,KAAK0hD,WAC3D1E,EAAM38C,GAAMylD,GAGhB9lD,KAAK8kD,QAAS,EACmC,GAA7C9kD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKgnD,uBACLhnD,KAAKuuD,kBAAkBvR,IAQzB95C,EAAQiQ,UAAUsxC,aAAe,SAAStvC,GAExC,IAAK,GADD6nC,GAAQh9C,KAAKg9C,MACRx3C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,SACNw3C,GAAM38C,GAEfL,KAAKgnD,uBAC4C,GAA7ChnD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKquD,0BACLruD,KAAKsuD,kBACLtuD,KAAKmuD,mBACLnuD,KAAKuuD,kBAAkBvR,IASzB95C,EAAQiQ,UAAUu0C,UAAY,SAAS5J,GACrC,GAAI8Q,GAAe5uD,KAAKqkD,SAExB,IAAIvG,YAAiBj9C,IAAWi9C,YAAiBh9C,GAC/Cd,KAAKqkD,UAAYvG,MAEd,IAAI73C,MAAMC,QAAQ43C,GACrB99C,KAAKqkD,UAAY,GAAIxjD,GACrBb,KAAKqkD,UAAUpxC,IAAI6qC,OAEhB,CAAA,GAAKA,EAIR,KAAM,IAAIz3C,WAAU,4BAHpBrG,MAAKqkD,UAAY,GAAIxjD,GAgBvB,GAVI+tD,GAEFjuD,EAAK0H,QAAQrI,KAAK0kD,eAAgB,SAAUp8C,EAAUgB,GACpDslD,EAAal7C,IAAIpK,EAAOhB,KAK5BtI,KAAK89C,SAED99C,KAAKqkD,UAAW,CAElB,GAAIlwC,GAAKnU,IACTW,GAAK0H,QAAQrI,KAAK0kD,eAAgB,SAAUp8C,EAAUgB,GACpD6K,EAAGkwC,UAAU9wC,GAAGjK,EAAOhB,IAIzB,IAAI6M,GAAMnV,KAAKqkD,UAAUxuC,QACzB7V,MAAK2kD,UAAUxvC,GAGjBnV,KAAKsuD,mBAQPprD,EAAQiQ,UAAUwxC,UAAY,SAAUxvC,GAItC,IAAK,GAHD2oC,GAAQ99C,KAAK89C,MACbuG,EAAYrkD,KAAKqkD,UAEZ7+C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GAETqpD,EAAU/Q,EAAMz9C,EAChBwuD,IACFA,EAAQC,YAGV,IAAIp8C,GAAO2xC,EAAUnvC,IAAI7U,GAAK0uD,iBAAoB,GAClDjR,GAAMz9C,GAAM,GAAI+C,GAAKsP,EAAM1S,KAAMA,KAAK0hD,WAExC1hD,KAAK8kD,QAAS,EACd9kD,KAAKuuD,kBAAkBzQ,GACvB99C,KAAKgvD,qBACLhvD,KAAKquD,0BAC4C,GAA7CruD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,6BAST9hD,EAAQiQ,UAAUyxC,aAAe,SAAUzvC,GAGzC,IAAK,GAFD2oC,GAAQ99C,KAAK89C,MACbuG,EAAYrkD,KAAKqkD,UACZ7+C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GAETkN,EAAO2xC,EAAUnvC,IAAI7U,GACrBqtD,EAAO5P,EAAMz9C,EACbqtD,IAEFA,EAAKoB,aACLpB,EAAKgB,cAAch8C,EAAM1S,KAAK0hD,WAC9BgM,EAAK5Q,YAIL4Q,EAAO,GAAItqD,GAAKsP,EAAM1S,KAAMA,KAAK0hD,WACjC1hD,KAAK89C,MAAMz9C,GAAMqtD,GAIrB1tD,KAAKgvD,qBAC4C,GAA7ChvD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAK8kD,QAAS,EACd9kD,KAAKuuD,kBAAkBzQ,IAQzB56C,EAAQiQ,UAAU0xC,aAAe,SAAU1vC,GAEzC,IAAK,GADD2oC,GAAQ99C,KAAK89C,MACRt4C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GACTkoD,EAAO5P,EAAMz9C,EACbqtD,KACc,MAAZA,EAAKuB,WACAjvD,MAAKkvD,QAAiB,QAAS,MAAExB,EAAKuB,IAAI5uD,IAEnDqtD,EAAKoB,mBACEhR,GAAMz9C,IAIjBL,KAAK8kD,QAAS,EACd9kD,KAAKuuD,kBAAkBzQ,GAC0B,GAA7C99C,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKquD,2BAOPnrD,EAAQiQ,UAAUm7C,gBAAkB,WAClC,GAAIjuD,GACA28C,EAAQh9C,KAAKg9C,MACbc,EAAQ99C,KAAK89C,KACjB,KAAKz9C,IAAM28C,GACLA,EAAMl3C,eAAezF,KACvB28C,EAAM38C,GAAIy9C,SACVd,EAAM38C,GAAI8uD,gBAId,KAAK9uD,IAAMy9C,GACT,GAAIA,EAAMh4C,eAAezF,GAAK,CAC5B,GAAIqtD,GAAO5P,EAAMz9C,EACjBqtD,GAAKtkC,KAAO,KACZskC,EAAKrkC,GAAK,KACVqkC,EAAK5Q,YAaX55C,EAAQiQ,UAAUo7C,kBAAoB,SAASxrC,GAC7C,GAAI1iB,GAGA6b,EAAW1V,OACX2V,EAAW3V,MACf,KAAKnG,IAAM0iB,GACT,GAAIA,EAAIjd,eAAezF,GAAK,CAC1B,GAAIgH,GAAQ0b,EAAI1iB,GAAIuU,UACNpO,UAAVa,IACF6U,EAAyB1V,SAAb0V,EAA0B7U,EAAQnC,KAAK4G,IAAIzE,EAAO6U,GAC9DC,EAAyB3V,SAAb2V,EAA0B9U,EAAQnC,KAAKwH,IAAIrF,EAAO8U,IAMpE,GAAiB3V,SAAb0V,GAAuC1V,SAAb2V,EAC5B,IAAK9b,IAAM0iB,GACLA,EAAIjd,eAAezF,IACrB0iB,EAAI1iB,GAAI+uD,cAAclzC,EAAUC,IAUxCjZ,EAAQiQ,UAAUsO,OAAS,WACzBzhB,KAAK2kB,QAAQ3kB,KAAK0hD,UAAUnvC,MAAOvS,KAAK0hD,UAAUlvC,QAClDxS,KAAK6iD,WAQP3/C,EAAQiQ,UAAU0vC,QAAU,SAAS3pB,GACnC,GAAInS,GAAM/mB,KAAKsf,MAAMC,OAAOyH,WAAW,KAEvCD,GAAIkiC,aAAajpD,KAAK2hD,WAAY,EAAG,EAAG3hD,KAAK2hD,WAAY,EAAG,EAG5D,IAAI0N,GAAIrvD,KAAKsf,MAAMC,OAAOhN,MAASvS,KAAK2hD,WACpCh2C,EAAI3L,KAAKsf,MAAMC,OAAO/M,OAAUxS,KAAK2hD,UACzC56B,GAAIE,UAAU,EAAG,EAAGooC,EAAG1jD,GAGvBob,EAAIuoC,OACJvoC,EAAIwoC,UAAUvvD,KAAKyd,YAAY1L,EAAG/R,KAAKyd,YAAYzL,GACnD+U,EAAI9J,MAAMjd,KAAKid,MAAOjd,KAAKid,OAE3Bjd,KAAK+jD,eACHhyC,EAAK/R,KAAKurD,qBAAqB,GAC/Bv5C,EAAKhS,KAAKyrD,qBAAqB,IAEjCzrD,KAAKgkD,mBACHjyC,EAAK/R,KAAKurD,qBAAqBvrD,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,YACpE3vC,EAAKhS,KAAKyrD,qBAAqBzrD,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,aAGvD,GAAVzoB,IACJl5B,KAAKwvD,gBAAgB,sBAAuBzoC,IAClB,GAAtB/mB,KAAKwlC,KAAKzG,UAA4Cv4B,SAAvBxG,KAAKwlC,KAAKzG,UAA4D,GAAlC/+B,KAAK0hD,UAAUF,kBACpFxhD,KAAKwvD,gBAAgB,aAAczoC,KAIb,GAAtB/mB,KAAKwlC,KAAKzG,UAA4Cv4B,SAAvBxG,KAAKwlC,KAAKzG,UAA4D,GAAlC/+B,KAAK0hD,UAAUD,kBACpFzhD,KAAKwvD,gBAAgB,aAAazoC,GAAI,GAGxB,GAAVmS,GAC2B,GAA3Bl5B,KAAK6hD,oBACP7hD,KAAKwvD,gBAAgB,oBAAqBzoC,GAQ9CA,EAAI0oC,UAEU,GAAVv2B,GACFnS,EAAIE,UAAU,EAAG,EAAGooC,EAAG1jD,IAU3BzI,EAAQiQ,UAAUmwC,gBAAkB,SAASoM,EAASC,GAC3BnpD,SAArBxG,KAAKyd,cACPzd,KAAKyd,aACH1L,EAAG,EACHC,EAAG,IAISxL,SAAZkpD,IACF1vD,KAAKyd,YAAY1L,EAAI29C,GAEPlpD,SAAZmpD,IACF3vD,KAAKyd,YAAYzL,EAAI29C,GAGvB3vD,KAAK4tB,KAAK,gBAQZ1qB,EAAQiQ,UAAU03C,gBAAkB,WAClC,OACE94C,EAAG/R,KAAKyd,YAAY1L,EACpBC,EAAGhS,KAAKyd,YAAYzL,IASxB9O,EAAQiQ,UAAU6J,UAAY,SAASC,GACrCjd,KAAKid,MAAQA,GAQf/Z,EAAQiQ,UAAUs3C,UAAY,WAC5B,MAAOzqD,MAAKid,OAUd/Z,EAAQiQ,UAAUo4C,qBAAuB,SAASx5C,GAChD,OAAQA,EAAI/R,KAAKyd,YAAY1L,GAAK/R,KAAKid,OAUzC/Z,EAAQiQ,UAAUq4C,qBAAuB,SAASz5C,GAChD,MAAOA,GAAI/R,KAAKid,MAAQjd,KAAKyd,YAAY1L,GAU3C7O,EAAQiQ,UAAUs4C,qBAAuB,SAASz5C,GAChD,OAAQA,EAAIhS,KAAKyd,YAAYzL,GAAKhS,KAAKid,OAUzC/Z,EAAQiQ,UAAUu4C,qBAAuB,SAAS15C,GAChD,MAAOA,GAAIhS,KAAKid,MAAQjd,KAAKyd,YAAYzL,GAU3C9O,EAAQiQ,UAAUu5C,YAAc,SAAUnnC,GACxC,OAAQxT,EAAG/R,KAAKwrD,qBAAqBjmC,EAAIxT,GAAIC,EAAGhS,KAAK0rD,qBAAqBnmC,EAAIvT,KAShF9O,EAAQiQ,UAAUi5C,YAAc,SAAU7mC,GACxC,OAAQxT,EAAG/R,KAAKurD,qBAAqBhmC,EAAIxT,GAAIC,EAAGhS,KAAKyrD,qBAAqBlmC,EAAIvT,KAUhF9O,EAAQiQ,UAAUy8C,WAAa,SAAS7oC,EAAI8oC,GACvBrpD,SAAfqpD,IACFA,GAAa,EAIf,IAAI7S,GAAQh9C,KAAKg9C,MACb3J,IAEJ,KAAK,GAAIhzC,KAAM28C,GACTA,EAAMl3C,eAAezF,KACvB28C,EAAM38C,GAAIyvD,eAAe9vD,KAAKid,MAAMjd,KAAK+jD,cAAc/jD,KAAKgkD,mBACxDhH,EAAM38C,GAAIyqD,aACZzX,EAASrrC,KAAK3H,IAGV28C,EAAM38C,GAAI0vD,UAAYF,IACxB7S,EAAM38C,GAAI0rC,KAAKhlB,GAOvB,KAAK,GAAInb,GAAI,EAAGokD,EAAO3c,EAAS1tC,OAAYqqD,EAAJpkD,EAAUA,KAC5CoxC,EAAM3J,EAASznC,IAAImkD,UAAYF,IACjC7S,EAAM3J,EAASznC,IAAImgC,KAAKhlB,IAW9B7jB,EAAQiQ,UAAU88C,WAAa,SAASlpC,GACtC,GAAI+2B,GAAQ99C,KAAK89C,KACjB,KAAK,GAAIz9C,KAAMy9C,GACb,GAAIA,EAAMh4C,eAAezF,GAAK,CAC5B,GAAIqtD,GAAO5P,EAAMz9C,EACjBqtD,GAAK1qB,SAAShjC,KAAKid,OACfywC,EAAKC,WACP7P,EAAMz9C,GAAI0rC,KAAKhlB,KAYvB7jB,EAAQiQ,UAAU+8C,kBAAoB,SAASnpC,GAC7C,GAAI+2B,GAAQ99C,KAAK89C,KACjB,KAAK,GAAIz9C,KAAMy9C,GACTA,EAAMh4C,eAAezF,IACvBy9C,EAAMz9C,GAAI6vD,kBAAkBnpC,IASlC7jB,EAAQiQ,UAAU00C,WAAa,WACgB,GAAzC7nD,KAAK0hD,UAAUb,wBACjB7gD,KAAKmwD,qBAKP,KADA,GAAIn5C,GAAQ,EACLhX,KAAK8kD,QAAU9tC,EAAQhX,KAAK0hD,UAAUN,yBAC3CphD,KAAKowD,eACLp5C,GAG0C,IAAxChX,KAAK0hD,UAAUL,uBACjBrhD,KAAKilD,WAAWz+C,QAAW,GAAO,GAGS,GAAzCxG,KAAK0hD,UAAUb,wBACjB7gD,KAAKqwD,uBAUTntD,EAAQiQ,UAAUg9C,oBAAsB,WACtC,GAAInT,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAI38C,KAAM28C,GACTA,EAAMl3C,eAAezF,IACJ,MAAf28C,EAAM38C,GAAI0R,GAA4B,MAAfirC,EAAM38C,GAAI2R,IACnCgrC,EAAM38C,GAAIiwD,UAAUv+C,EAAIirC,EAAM38C,GAAI8qD,OAClCnO,EAAM38C,GAAIiwD,UAAUt+C,EAAIgrC,EAAM38C,GAAI+qD,OAClCpO,EAAM38C,GAAI8qD,QAAS,EACnBnO,EAAM38C,GAAI+qD,QAAS,IAW3BloD,EAAQiQ,UAAUk9C,oBAAsB,WACtC,GAAIrT,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAI38C,KAAM28C,GACTA,EAAMl3C,eAAezF,IACM,MAAzB28C,EAAM38C,GAAIiwD,UAAUv+C,IACtBirC,EAAM38C,GAAI8qD,OAASnO,EAAM38C,GAAIiwD,UAAUv+C,EACvCirC,EAAM38C,GAAI+qD,OAASpO,EAAM38C,GAAIiwD,UAAUt+C,IAa/C9O,EAAQiQ,UAAUo9C,UAAY,SAASC,GACrC,GAAIxT,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAI38C,KAAM28C,GACb,GAAIA,EAAMl3C,eAAezF,IAAO28C,EAAM38C,GAAIowD,SAASD,GACjD,OAAO,CAGX,QAAO,GAUTttD,EAAQiQ,UAAUu9C,mBAAqB,WACrC,GAEIvK,GAFA3zB,EAAWxyB,KAAKy8C,wBAChBO,EAAQh9C,KAAKg9C,MAEb2T,GAAe,CAEnB,IAAI3wD,KAAK0hD,UAAUT,YAAc,EAC/B,IAAKkF,IAAUnJ,GACTA,EAAMl3C,eAAeqgD,KACvBnJ,EAAMmJ,GAAQyK,oBAAoBp+B,EAAUxyB,KAAK0hD,UAAUT,aAC3D0P,GAAe,OAKnB,KAAKxK,IAAUnJ,GACTA,EAAMl3C,eAAeqgD,KACvBnJ,EAAMmJ,GAAQ0K,aAAar+B,GAC3Bm+B,GAAe,EAKrB,IAAoB,GAAhBA,EAAsB,CACxB,GAAIG,GAAgB9wD,KAAK0hD,UAAUR,YAAch8C,KAAKwH,IAAI1M,KAAKid,MAAM,IACrE,OAAI6zC,GAAgB,GAAI9wD,KAAK0hD,UAAUT,aAC9B,EAGAjhD,KAAKuwD,UAAUO,GAG1B,OAAO,GAIT5tD,EAAQiQ,UAAU49C,oBAAsB,WACtC,GAAI/T,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAImJ,KAAUnJ,GACbA,EAAMl3C,eAAeqgD,IACvBnJ,EAAMmJ,GAAQ6K,kBAKpB9tD,EAAQiQ,UAAU89C,mBAAqB,WACrCjxD,KAAKkxD,sBAAsB,uBACgB,GAAvClxD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,SAC7E/gD,KAAKmxD,mBAAmB,wBAS5BjuD,EAAQiQ,UAAUi9C,aAAe,WAC/B,IAAKpwD,KAAKujD,kBACW,GAAfvjD,KAAK8kD,OAAgB,CACvB,GAAIsM,IAAmB,EACnBC,GAAsB,CAE1BrxD,MAAKkxD,sBAAsB,8BAC3B,IAAII,GAAatxD,KAAKkxD,sBAAsB,qBACD,IAAvClxD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,UAC7EsQ,EAAsBrxD,KAAKmxD,mBAAmB,sBAIhD,KAAK,GAAI3rD,GAAI,EAAGA,EAAI8rD,EAAW3rD,OAAQH,IAAM4rD,EAAmBE,EAAW,IAAMF,CAGjFpxD,MAAK8kD,OAASsM,GAAoBC,EAEf,GAAfrxD,KAAK8kD,OACP9kD,KAAKixD,qBAI4B,GAA7BjxD,KAAKyjD,uBACPzjD,KAAK4tB,KAAK,sBACV5tB,KAAKyjD,sBAAuB,GAIhCzjD,KAAKohD,4BAYXl+C,EAAQiQ,UAAUo+C,eAAiB,WAEjCvxD,KAAK+kD,MAAQv+C,OAGbxG,KAAKwxD,mBAEL,IAAIC,GAAYntD,KAAK64B,KACrBn9B,MAAKowD,cACL,IAAI7T,GAAcj4C,KAAK64B,MAAQs0B,GAG1BzxD,KAAKq8C,eAAiBr8C,KAAKs8C,WAAa,EAAIC,GAAsC,GAAvBv8C,KAAKw8C,iBAA2C,GAAfx8C,KAAK8kD,SACpG9kD,KAAKowD,eAGkB,GAAnBpwD,KAAKs8C,aACPt8C,KAAKw8C,gBAAiB,GAI1B,IAAIkV,GAAkBptD,KAAK64B,KAC3Bn9B,MAAK6iD,UACL7iD,KAAKs8C,WAAah4C,KAAK64B,MAAQu0B,EAG/B1xD,KAAK4P,SAGe,mBAAXrG,UACTA,OAAOooD,sBAAwBpoD,OAAOooD,uBAAyBpoD,OAAOqoD,0BACvCroD,OAAOsoD,6BAA+BtoD,OAAOuoD,yBAM9E5uD,EAAQiQ,UAAUvD,MAAQ,WACxB,GAAmB,GAAf5P,KAAK8kD,QAAqC,GAAnB9kD,KAAK8iD,YAAsC,GAAnB9iD,KAAK+iD,YAAyC,GAAtB/iD,KAAKgjD,cACzEhjD,KAAK+kD,QAEN/kD,KAAK+kD,MADqB,GAAxB/kD,KAAKwlD,gBACMj8C,OAAO+P,WAAWtZ,KAAKuxD,eAAez8B,KAAK90B,MAAOA,KAAKq8C,gBAGvD9yC,OAAOooD,sBAAsB3xD,KAAKuxD,eAAez8B,KAAK90B,YAOvE,IAFAA,KAAK6iD,UAED7iD,KAAKohD,wBAA0B,EAAG,CAKpC,GAAIjtC,GAAKnU,KACL8T,GACFi+C,WAAY59C,EAAGitC,wBAEjBphD,MAAKohD,wBAA0B,EAC/BphD,KAAKyjD,sBAAuB,EAC5BnqC,WAAW,WACTnF,EAAGyZ,KAAK,aAAc9Z,IACrB,OAGH9T,MAAKohD,wBAA0B,GAWrCl+C,EAAQiQ,UAAUq+C,kBAAoB,WACpC,GAAuB,GAAnBxxD,KAAK8iD,YAAsC,GAAnB9iD,KAAK+iD,WAAiB,CAChD,GAAItlC,GAAczd,KAAK6qD,iBACvB7qD,MAAKsjD,gBAAgB7lC,EAAY1L,EAAE/R,KAAK8iD,WAAYrlC,EAAYzL,EAAEhS,KAAK+iD,YAEzE,GAA0B,GAAtB/iD,KAAKgjD,cAAoB,CAC3B,GAAI92B,IACFna,EAAG/R,KAAKsf,MAAMC,OAAOC,YAAc,EACnCxN,EAAGhS,KAAKsf,MAAMC,OAAOsF,aAAe,EAEtC7kB,MAAKisD,MAAMjsD,KAAKid,OAAO,EAAIjd,KAAKgjD,eAAgB92B,KAQpDhpB,EAAQiQ,UAAU6+C,aAAe,WACF,GAAzBhyD,KAAKujD,iBACPvjD,KAAKujD,kBAAmB,GAGxBvjD,KAAKujD,kBAAmB,EACxBvjD,KAAK4P,UAWT1M,EAAQiQ,UAAUu1C,uBAAyB,SAASlC,GAIlD,GAHqBhgD,SAAjBggD,IACFA,GAAe,GAE0B,GAAvCxmD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAAiB,CAC9F/gD,KAAKgvD,oBAEL,KAAK,GAAI7I,KAAUnmD,MAAKkvD,QAAiB,QAAS,MAC5ClvD,KAAKkvD,QAAiB,QAAS,MAAEppD,eAAeqgD,IACwB3/C,SAAtExG,KAAK89C,MAAM99C,KAAKkvD,QAAiB,QAAS,MAAE/I,GAAQ8L,qBAC/CjyD,MAAKkvD,QAAiB,QAAS,MAAE/I,OAK3C,CAEHnmD,KAAKkvD,QAAiB,QAAS,QAC/B,KAAK,GAAIlC,KAAUhtD,MAAK89C,MAClB99C,KAAK89C,MAAMh4C,eAAeknD,KAC5BhtD,KAAK89C,MAAMkP,GAAQiC,IAAM,MAM/BjvD,KAAKquD,0BACA7H,IACHxmD,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAWT1M,EAAQiQ,UAAU67C,mBAAqB,WACrC,GAA2C,GAAvChvD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAC7E,IAAK,GAAIiM,KAAUhtD,MAAK89C,MACtB,GAAI99C,KAAK89C,MAAMh4C,eAAeknD,GAAS,CACrC,GAAIU,GAAO1tD,KAAK89C,MAAMkP,EACtB,IAAgB,MAAZU,EAAKuB,IAAa,CACpB,GAAI9I,GAAS,UAAUnyC,OAAO05C,EAAKrtD,GACnCL,MAAKkvD,QAAiB,QAAS,MAAE/I,GAAU,GAAI5iD,IACtClD,GAAG8lD,EACFlJ,KAAK,EACLG,MAAM,SACNC,MAAM,GACN6U,mBAAmB,SACblyD,KAAK0hD,WACrBgM,EAAKuB,IAAMjvD,KAAKkvD,QAAiB,QAAS,MAAE/I,GAC5CuH,EAAKuB,IAAIgD,aAAevE,EAAKrtD,GAC7BqtD,EAAKyE,wBAYfjvD,EAAQiQ,UAAUgpC,wBAA0B,WAC1C,IAAK,GAAIiW,KAAS/M,GACZA,EAAYv/C,eAAessD,KAC7BlvD,EAAQiQ,UAAUi/C,GAAS/M,EAAY+M,KAQ7ClvD,EAAQiQ,UAAUk/C,cAAgB,WAChC15B,QAAQhF,IAAI,mEACZ3zB,KAAKsyD,kBAMPpvD,EAAQiQ,UAAUm/C,eAAiB,WACjC,GAAIC,KACJ,KAAK,GAAIpM,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,GAClBqM,GAAkBxyD,KAAKg9C,MAAMmO,OAC7BsH,GAAkBzyD,KAAKg9C,MAAMoO,QAC7BprD,KAAKokD,UAAUxxC,MAAMuzC,GAAQp0C,GAAK7M,KAAKwoB,MAAMo4B,EAAK/zC,IAAM/R,KAAKokD,UAAUxxC,MAAMuzC,GAAQn0C,GAAK9M,KAAKwoB,MAAMo4B,EAAK9zC,KAC5GugD,EAAUvqD,MAAM3H,GAAG8lD,EAAOp0C,EAAE7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAGC,EAAE9M,KAAKwoB,MAAMo4B,EAAK9zC,GAAGwgD,eAAeA,EAAeC,eAAeA,IAIvHzyD,KAAKokD,UAAUvvC,OAAO09C,IAMxBrvD,EAAQiQ,UAAUu/C,aAAe,SAASv9C,GACxC,GAAIo9C,KACJ,IAAY/rD,SAAR2O,GACF,GAA0B,GAAtBlP,MAAMC,QAAQiP,IAChB,IAAK,GAAI3P,GAAI,EAAGA,EAAI2P,EAAIxP,OAAQH,IAC9B,GAA2BgB,SAAvBxG,KAAKg9C,MAAM7nC,EAAI3P,IAAmB,CACpC,GAAIsgD,GAAO9lD,KAAKg9C,MAAM7nC,EAAI3P,GAC1B+sD,GAAUp9C,EAAI3P,KAAOuM,EAAG7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAIC,EAAG9M,KAAKwoB,MAAMo4B,EAAK9zC,SAKnE,IAAwBxL,SAApBxG,KAAKg9C,MAAM7nC,GAAoB,CACjC,GAAI2wC,GAAO9lD,KAAKg9C,MAAM7nC,EACtBo9C,GAAUp9C,IAAQpD,EAAG7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAIC,EAAG9M,KAAKwoB,MAAMo4B,EAAK9zC,SAKhE,KAAK,GAAIm0C,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EACtBoM,GAAUpM,IAAWp0C,EAAG7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAIC,EAAG9M,KAAKwoB,MAAMo4B,EAAK9zC,IAIrE,MAAOugD,IAWTrvD,EAAQiQ,UAAUw/C,YAAc,SAAUxM,EAAQ13C,GAChD,GAAIzO,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrB3/C,SAAZiI,IACFA,KAEF,IAAImkD,IAAgB7gD,EAAG/R,KAAKg9C,MAAMmJ,GAAQp0C,EAAGC,EAAGhS,KAAKg9C,MAAMmJ,GAAQn0C,EACnEvD,GAAQmV,SAAWgvC,EACnBnkD,EAAQokD,aAAe1M,EAEvBnmD,KAAK6nB,OAAOpZ,OAGZkqB,SAAQhF,IAAI,iCAWhBzwB,EAAQiQ,UAAU0U,OAAS,SAAUpZ,GACnC,MAAgBjI,UAAZiI,OACFA,OAGwBjI,SAAtBiI,EAAQkb,SAAoClb,EAAQkb,QAAa5X,EAAG,EAAGC,EAAG,IACpDxL,SAAtBiI,EAAQkb,OAAO5X,IAA6BtD,EAAQkb,OAAO5X,EAAK,GAC1CvL,SAAtBiI,EAAQkb,OAAO3X,IAA6BvD,EAAQkb,OAAO3X,EAAK,GAC1CxL,SAAtBiI,EAAQwO,QAAoCxO,EAAQwO,MAAYjd,KAAKyqD,aAC/CjkD,SAAtBiI,EAAQmV,WAAoCnV,EAAQmV,SAAY5jB,KAAK6qD,mBAC/CrkD,SAAtBiI,EAAQs4C,YAAoCt4C,EAAQs4C,WAAaj3C,SAAS,IAC1ErB,EAAQs4C,aAAc,IAAsBt4C,EAAQs4C,WAAaj3C,SAAS,IAC1ErB,EAAQs4C,aAAc,IAAsBt4C,EAAQs4C,cACrBvgD,SAA/BiI,EAAQs4C,UAAUj3C,WAA0BrB,EAAQs4C,UAAUj3C,SAAW,KACpCtJ,SAArCiI,EAAQs4C,UAAU+L,iBAAgCrkD,EAAQs4C,UAAU+L,eAAiB,qBAEzF9yD,MAAK+yD,YAAYtkD,KAcnBvL,EAAQiQ,UAAU4/C,YAAc,SAAUtkD,GACxC,GAAgBjI,SAAZiI,EAEF,YADAA,KAKFzO,MAAKsrD,cACiB,GAAlB78C,EAAQukD,SACVhzD,KAAKwiD,eAAiB/zC,EAAQokD,aAC9B7yD,KAAKyiD,mBAAqBh0C,EAAQkb,QAIb,GAAnB3pB,KAAKmiD,YACPniD,KAAKizD,kBAAkB,GAGzBjzD,KAAKoiD,YAAcpiD,KAAKyqD,YACxBzqD,KAAKsiD,kBAAoBtiD,KAAK6qD,kBAC9B7qD,KAAKqiD,YAAc5zC,EAAQwO,MAI3Bjd,KAAKgd,UAAUhd,KAAKqiD,YACpB,IAAI6Q,GAAalzD,KAAKosD,aAAar6C,EAAG,GAAM/R,KAAKsf,MAAMC,OAAOC,YAAaxN,EAAG,GAAMhS,KAAKsf,MAAMC,OAAOsF,eAClGsuC,GACFphD,EAAGmhD,EAAWnhD,EAAItD,EAAQmV,SAAS7R,EACnCC,EAAGkhD,EAAWlhD,EAAIvD,EAAQmV,SAAS5R,EAErChS,MAAKuiD,mBACHxwC,EAAG/R,KAAKsiD,kBAAkBvwC,EAAIohD,EAAmBphD,EAAI/R,KAAKqiD,YAAc5zC,EAAQkb,OAAO5X,EACvFC,EAAGhS,KAAKsiD,kBAAkBtwC,EAAImhD,EAAmBnhD,EAAIhS,KAAKqiD,YAAc5zC,EAAQkb,OAAO3X,GAIvD,GAA9BvD,EAAQs4C,UAAUj3C,SACO,MAAvB9P,KAAKwiD,gBACPxiD,KAAKozD,eAAiBpzD,KAAK6iD,QAC3B7iD,KAAK6iD,QAAU7iD,KAAKqzD,gBAGpBrzD,KAAKgd,UAAUhd,KAAKqiD,aACpBriD,KAAKsjD,gBAAgBtjD,KAAKuiD,kBAAkBxwC,EAAG/R,KAAKuiD,kBAAkBvwC,GACtEhS,KAAK6iD,YAIP7iD,KAAKiiD,eAAiB,GAAKjiD,KAAKo8C,kBAAoB3tC,EAAQs4C,UAAUj3C,SAAW,OAAU,EAAI9P,KAAKo8C,kBACpGp8C,KAAKkiD,wBAA0BzzC,EAAQs4C,UAAU+L,eACjD9yD,KAAKozD,eAAiBpzD,KAAK6iD,QAC3B7iD,KAAK6iD,QAAU7iD,KAAKizD,kBACpBjzD,KAAK6iD,UACL7iD,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAQT1M,EAAQiQ,UAAUkgD,cAAgB,WAChC,GAAIT,IAAgB7gD,EAAG/R,KAAKg9C,MAAMh9C,KAAKwiD,gBAAgBzwC,EAAGC,EAAGhS,KAAKg9C,MAAMh9C,KAAKwiD,gBAAgBxwC,GACzFkhD,EAAalzD,KAAKosD,aAAar6C,EAAG,GAAM/R,KAAKsf,MAAMC,OAAOC,YAAaxN,EAAG,GAAMhS,KAAKsf,MAAMC,OAAOsF,eAClGsuC,GACFphD,EAAGmhD,EAAWnhD,EAAI6gD,EAAa7gD,EAC/BC,EAAGkhD,EAAWlhD,EAAI4gD,EAAa5gD,GAE7BswC,EAAoBtiD,KAAK6qD,kBACzBtI,GACFxwC,EAAGuwC,EAAkBvwC,EAAIohD,EAAmBphD,EAAI/R,KAAKid,MAAQjd,KAAKyiD,mBAAmB1wC,EACrFC,EAAGswC,EAAkBtwC,EAAImhD,EAAmBnhD,EAAIhS,KAAKid,MAAQjd,KAAKyiD,mBAAmBzwC,EAGvFhS,MAAKsjD,gBAAgBf,EAAkBxwC,EAAEwwC,EAAkBvwC,GAC3DhS,KAAKozD,kBAGPlwD,EAAQiQ,UAAUm4C,YAAc,WACH,MAAvBtrD,KAAKwiD,iBACPxiD,KAAK6iD,QAAU7iD,KAAKozD,eACpBpzD,KAAKwiD,eAAiB,KACtBxiD,KAAKyiD,mBAAqB,OAS9Bv/C,EAAQiQ,UAAU8/C,kBAAoB,SAAU9Q,GAC9CniD,KAAKmiD,WAAaA,GAAcniD,KAAKmiD,WAAaniD,KAAKiiD,eACvDjiD,KAAKmiD,YAAcniD,KAAKiiD,cAExB,IAAIxwB,GAAW9wB,EAAKqP,gBAAgBhQ,KAAKkiD,yBAAyBliD,KAAKmiD,WAEvEniD,MAAKgd,UAAUhd,KAAKoiD,aAAepiD,KAAKqiD,YAAcriD,KAAKoiD,aAAe3wB,GAC1EzxB,KAAKsjD,gBACHtjD,KAAKsiD,kBAAkBvwC,GAAK/R,KAAKuiD,kBAAkBxwC,EAAI/R,KAAKsiD,kBAAkBvwC,GAAK0f,EACnFzxB,KAAKsiD,kBAAkBtwC,GAAKhS,KAAKuiD,kBAAkBvwC,EAAIhS,KAAKsiD,kBAAkBtwC,GAAKyf,GAGrFzxB,KAAKozD,iBACLpzD,KAAK8kD,QAAS,EAGV9kD,KAAKmiD,YAAc,IACrBniD,KAAKmiD,WAAa,EAEhBniD,KAAK6iD,QADoB,MAAvB7iD,KAAKwiD,eACQxiD,KAAKqzD,cAGLrzD,KAAKozD,eAEtBpzD,KAAK4tB,KAAK,uBAId1qB,EAAQiQ,UAAUigD,eAAiB,aAQnClwD,EAAQiQ,UAAUs2C,SAAW,WAC3B,OAAQzpD,KAAKsoD,WAAatoD,KAAKsoD,UAAUgL,QAQ3CpwD,EAAQiQ,UAAU6vB,SAAW,WAC3B,MAAOhjC,MAAKgd,aAQd9Z,EAAQiQ,UAAUogD,SAAW,WAC3B,MAAOvzD,MAAKyqD,aAQdvnD,EAAQiQ,UAAUqgD,qBAAuB,WACvC,MAAOxzD,MAAKosD,aAAar6C,EAAG,GAAM/R,KAAKsf,MAAMC,OAAOC,YAAaxN,EAAG,GAAMhS,KAAKsf,MAAMC,OAAOsF,gBAI9F3hB,EAAQiQ,UAAUsgD,eAAiB,SAAStN,GAC1C,MAA2B3/C,UAAvBxG,KAAKg9C,MAAMmJ,GACNnmD,KAAKg9C,MAAMmJ,GAAQC,YAD5B,QAKFvmD,EAAOD,QAAUsD,GAKb,SAASrD,EAAQD,EAASM,GAoB9B,QAASkD,GAAMurD,EAAYxrD,EAASuwD,GAClC,IAAKvwD,EACH,KAAM,qBAER,IAAI+K,IAAU,QAAQ,WAClBwzC,EAAY/gD,EAAKsN,sBAAsBC,EAAOwlD,EAClD1zD,MAAKyO,QAAUizC,EAAU5D,MACzB99C,KAAKw+C,QAAUkD,EAAUlD,QACzBx+C,KAAKyO,QAAsB,aAAIilD,EAA+B,aAG9D1zD,KAAKmD,QAAUA,EAGfnD,KAAKK,GAASmG,OACdxG,KAAK2zD,OAASntD,OACdxG,KAAK4zD,KAASptD,OACdxG,KAAKglC,MAASx+B,OACdxG,KAAK6zD,cAAgB7zD,KAAKyO,QAAQ8D,MAAQvS,KAAKyO,QAAQsvC,yBACvD/9C,KAAKqH,MAASb,OACdxG,KAAKqzC,UAAW,EAChBrzC,KAAKsM,OAAQ,EACbtM,KAAK8zD,iBAAmBnsD,IAAI,EAAEF,KAAK,EAAE8K,MAAM,EAAEC,OAAO,EAAEuhD,MAAM,GAC5D/zD,KAAKg0D,YAAa,EAElBh0D,KAAKopB,KAAO,KACZppB,KAAKqpB,GAAK,KACVrpB,KAAKivD,IAAM,KAEXjvD,KAAKi0D,WAAa,KAClBj0D,KAAKk0D,SAAW,KAIhBl0D,KAAKm0D,kBACLn0D,KAAKo0D,gBAELp0D,KAAK2tD,WAAY,EAEjB3tD,KAAKq0D,YAAc,EACnBr0D,KAAKs0D,aAAc,EAEnBt0D,KAAK0uD,cAAcC,GAEnB3uD,KAAKu0D,qBAAsB,EAC3Bv0D,KAAKw0D,cAAgBprC,KAAK,KAAMC,GAAG,KAAMorC,cACzCz0D,KAAK00D,cAAgB,KAhEvB,GAAI/zD,GAAOT,EAAoB,GAC3BqD,EAAOrD,EAAoB,GAuE/BkD,GAAK+P,UAAUu7C,cAAgB,SAASC,GACtC,GAAKA,EAAL,CAIA,GAAIzgD,IAAU,QAAQ,WAAW,WAAW,YAAY,WAAW,kBAAkB,kBAAkB,QACrG,2BAA2B,aAAa,mBAAmB,OAAO,eAAe,iBAoCnF,QAlCAvN,EAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASkgD,GAEvBnoD,SAApBmoD,EAAWvlC,OAA+BppB,KAAK2zD,OAAShF,EAAWvlC,MACjD5iB,SAAlBmoD,EAAWtlC,KAA+BrpB,KAAK4zD,KAAOjF,EAAWtlC,IAE/C7iB,SAAlBmoD,EAAWtuD,KAA+BL,KAAKK,GAAKsuD,EAAWtuD,IAC1CmG,SAArBmoD,EAAWlmC,QAA+BzoB,KAAKyoB,MAAQkmC,EAAWlmC,MAAOzoB,KAAKg0D,YAAa,GAEtExtD,SAArBmoD,EAAW3pB,QAA6BhlC,KAAKglC,MAAQ2pB,EAAW3pB,OAC3Cx+B,SAArBmoD,EAAWtnD,QAA6BrH,KAAKqH,MAAQsnD,EAAWtnD,OAC1Cb,SAAtBmoD,EAAWhpD,SAA6B3F,KAAKw+C,QAAQK,aAAe8P,EAAWhpD,QAE1Da,SAArBmoD,EAAWxjD,QACbnL,KAAKyO,QAAQ6vC,cAAe,EACxB39C,EAAKwD,SAASwqD,EAAWxjD,QAC3BnL,KAAKyO,QAAQtD,MAAMA,MAAQwjD,EAAWxjD,MACtCnL,KAAKyO,QAAQtD,MAAMkB,UAAYsiD,EAAWxjD,QAGX3E,SAA3BmoD,EAAWxjD,MAAMA,QAA0BnL,KAAKyO,QAAQtD,MAAMA,MAAQwjD,EAAWxjD,MAAMA,OACxD3E,SAA/BmoD,EAAWxjD,MAAMkB,YAA0BrM,KAAKyO,QAAQtD,MAAMkB,UAAYsiD,EAAWxjD,MAAMkB,WAChE7F,SAA3BmoD,EAAWxjD,MAAMmB,QAA0BtM,KAAKyO,QAAQtD,MAAMmB,MAAQqiD,EAAWxjD,MAAMmB,SAK/FtM,KAAK88C,UAEL98C,KAAKq0D,WAAar0D,KAAKq0D,YAAoC7tD,SAArBmoD,EAAWp8C,MACjDvS,KAAKs0D,YAAct0D,KAAKs0D,aAAsC9tD,SAAtBmoD,EAAWhpD,OAEnD3F,KAAK6zD,cAAgB7zD,KAAKyO,QAAQ8D,MAAOvS,KAAKyO,QAAQsvC,yBAG9C/9C,KAAKyO,QAAQxB,OACnB,IAAK,OAAiBjN,KAAK+rC,KAAO/rC,KAAK20D,SAAW,MAClD,KAAK,QAAiB30D,KAAK+rC,KAAO/rC,KAAK40D,UAAY,MACnD,KAAK,eAAiB50D,KAAK+rC,KAAO/rC,KAAK60D,gBAAkB,MACzD,KAAK,YAAiB70D,KAAK+rC,KAAO/rC,KAAK80D,aAAe,MACtD,SAAsB90D,KAAK+rC,KAAO/rC,KAAK20D,aAQ3CvxD,EAAK+P,UAAU2pC,QAAU,WACvB98C,KAAK8uD,aAEL9uD,KAAKopB,KAAOppB,KAAKmD,QAAQ65C,MAAMh9C,KAAK2zD,SAAW,KAC/C3zD,KAAKqpB,GAAKrpB,KAAKmD,QAAQ65C,MAAMh9C,KAAK4zD,OAAS,KAC3C5zD,KAAK2tD,UAAa3tD,KAAKopB,MAAQppB,KAAKqpB,GAEhCrpB,KAAK2tD,WACP3tD,KAAKopB,KAAK2rC,WAAW/0D,MACrBA,KAAKqpB,GAAG0rC,WAAW/0D,QAGfA,KAAKopB,MACPppB,KAAKopB,KAAK4rC,WAAWh1D,MAEnBA,KAAKqpB,IACPrpB,KAAKqpB,GAAG2rC,WAAWh1D,QAQzBoD,EAAK+P,UAAU27C,WAAa,WACtB9uD,KAAKopB,OACPppB,KAAKopB,KAAK4rC,WAAWh1D,MACrBA,KAAKopB,KAAO,MAEVppB,KAAKqpB,KACPrpB,KAAKqpB,GAAG2rC,WAAWh1D,MACnBA,KAAKqpB,GAAK,MAGZrpB,KAAK2tD,WAAY,GAQnBvqD,EAAK+P,UAAUq6C,SAAW,WACxB,MAA6B,kBAAfxtD,MAAKglC,MAAuBhlC,KAAKglC,QAAUhlC,KAAKglC;EAQhE5hC,EAAK+P,UAAUyB,SAAW,WACxB,MAAO5U,MAAKqH,OASdjE,EAAK+P,UAAUi8C,cAAgB,SAAStjD,EAAKY,GAC3C,IAAK1M,KAAKq0D,YAA6B7tD,SAAfxG,KAAKqH,MAAqB,CAChD,GAAI4V,IAASjd,KAAKyO,QAAQ0Y,SAAWnnB,KAAKyO,QAAQyY,WAAaxa,EAAMZ,EACrE9L,MAAKyO,QAAQ8D,OAAQvS,KAAKqH,MAAQyE,GAAOmR,EAAQjd,KAAKyO,QAAQyY,SAC9DlnB,KAAK6zD,cAAgB7zD,KAAKyO,QAAQ8D,MAAOvS,KAAKyO,QAAQsvC,2BAU1D36C,EAAK+P,UAAU44B,KAAO,WACpB,KAAM,uCAQR3oC,EAAK+P,UAAUo6C,kBAAoB,SAASxqC,GAC1C,GAAI/iB,KAAK2tD,UAAW,CAClB,GAAIv+B,GAAU,GACV6lC,EAAQj1D,KAAKopB,KAAKrX,EAClBmjD,EAAQl1D,KAAKopB,KAAKpX,EAClBmjD,EAAMn1D,KAAKqpB,GAAGtX,EACdqjD,EAAMp1D,KAAKqpB,GAAGrX,EACdqjD,EAAOtyC,EAAItb,KACX6tD,EAAOvyC,EAAIpb,IAEXujB,EAAOlrB,KAAKu1D,mBAAmBN,EAAOC,EAAOC,EAAKC,EAAKC,EAAMC,EAEjE,OAAelmC,GAAPlE,EAGR,OAAO,GAIX9nB,EAAK+P,UAAUqiD,UAAY,WACzB,GAAIC,GAAWz1D,KAAKyO,QAAQtD,KAgB5B,OAfiC,MAA7BnL,KAAKyO,QAAQ6vC,aACfmX,GACEppD,UAAWrM,KAAKqpB,GAAG5a,QAAQtD,MAAMkB,UAAUD,OAC3CE,MAAOtM,KAAKqpB,GAAG5a,QAAQtD,MAAMmB,MAAMF,OACnCjB,MAAOnL,KAAKqpB,GAAG5a,QAAQtD,MAAMiB,SAGK,QAA7BpM,KAAKyO,QAAQ6vC,cAAuD,GAA7Bt+C,KAAKyO,QAAQ6vC,gBAC3DmX,GACEppD,UAAWrM,KAAKopB,KAAK3a,QAAQtD,MAAMkB,UAAUD,OAC7CE,MAAOtM,KAAKopB,KAAK3a,QAAQtD,MAAMmB,MAAMF,OACrCjB,MAAOnL,KAAKopB,KAAK3a,QAAQtD,MAAMiB,SAId,GAAjBpM,KAAKqzC,SAA4BoiB,EAASppD,UACvB,GAAdrM,KAAKsM,MAAuBmpD,EAASnpD,MACTmpD,EAAStqD,OAWhD/H,EAAK+P,UAAUwhD,UAAY,SAAS5tC,GAKlC,GAHAA,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIO,UAActnB,KAAK01D,gBAEnB11D,KAAKopB,MAAQppB,KAAKqpB,GAAI,CAExB,GAGInX,GAHA+8C,EAAMjvD,KAAK21D,MAAM5uC,EAIrB,IAAI/mB,KAAKyoB,MAAO,CACd,GAAyC,GAArCzoB,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EAAa,CAC5D,GAAI2G,GAAY,IAAK,IAAK51D,KAAKopB,KAAKrX,EAAIk9C,EAAIl9C,GAAK,IAAK/R,KAAKqpB,GAAGtX,EAAIk9C,EAAIl9C,IAClE8jD,EAAY,IAAK,IAAK71D,KAAKopB,KAAKpX,EAAIi9C,EAAIj9C,GAAK,IAAKhS,KAAKqpB,GAAGrX,EAAIi9C,EAAIj9C,GACtEE,IAASH,EAAE6jD,EAAW5jD,EAAE6jD,OAGxB3jD,GAAQlS,KAAK81D,aAAa,GAE5B91D,MAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,QAG3C,CACH,GAAID,GAAGC,EACHyZ,EAASzrB,KAAKw+C,QAAQK,aAAe,EACrCiH,EAAO9lD,KAAKopB,IACX08B,GAAKvzC,OACRuzC,EAAKkQ,OAAOjvC,GAEV++B,EAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAI+zC,EAAKvzC,MAAQ,EAC1BP,EAAI8zC,EAAK9zC,EAAIyZ,IAGb1Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAI8zC,EAAKtzC,OAAS,GAE7BxS,KAAKi2D,QAAQlvC,EAAKhV,EAAGC,EAAGyZ,GACxBvZ,EAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1CzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,KAUhD5O,EAAK+P,UAAUuiD,cAAgB,WAC7B,MAAqB,IAAjB11D,KAAKqzC,SACCnuC,KAAKwH,IAAIxH,KAAK4G,IAAI9L,KAAK6zD,cAAe7zD,KAAKyO,QAAQ0Y,UAAW,GAAInnB,KAAKm2D,iBAG7D,GAAdn2D,KAAKsM,MACApH,KAAKwH,IAAIxH,KAAK4G,IAAI9L,KAAKyO,QAAQuvC,WAAYh+C,KAAKyO,QAAQ0Y,UAAW,GAAInnB,KAAKm2D,iBAG5EjxD,KAAKwH,IAAI1M,KAAKyO,QAAQ8D,MAAO,GAAIvS,KAAKm2D,kBAKnD/yD,EAAK+P,UAAUijD,mBAAqB,WAClC,GAAyC,GAArCp2D,KAAKyO,QAAQqyC,aAAaC,SAAwD,GAArC/gD,KAAKyO,QAAQqyC,aAAapyC,QACzE,MAAO1O,MAAKivD,GAET,IAAyC,GAArCjvD,KAAKyO,QAAQqyC,aAAapyC,QACjC,OAAQqD,EAAE,EAAEC,EAAE,EAGd,IAAIqkD,GAAO,KACPC,EAAO,KACP3P,EAAS3mD,KAAKyO,QAAQqyC,aAAaE,UACnCl6C,EAAO9G,KAAKyO,QAAQqyC,aAAah6C,KAEjC8X,EAAK1Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACpC8M,EAAK3Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EA2JxC,OA1JY,YAARlL,GAA8B,iBAARA,EACpB5B,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACjEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,GAEvB7e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,GAGzB7e,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,GAEvB7e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,IAGtB,YAAR/X,IACFuvD,EAAY1P,EAAS9nC,EAAdD,EAAmB5e,KAAKopB,KAAKrX,EAAIskD,IAGnCnxD,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KACtEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,GAEvB5e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,GAGzB5e,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,GAEvB5e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,IAGtB,YAAR9X,IACFwvD,EAAY3P,EAAS/nC,EAAdC,EAAmB7e,KAAKopB,KAAKpX,EAAIskD,IAI7B,iBAARxvD,EACH5B,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACrEqkD,EAAOr2D,KAAKopB,KAAKrX,EAEfukD,EADEt2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACjBhS,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,EAG3B7e,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,GAG7B3Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KAExEqkD,EADEr2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,EACjB/R,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAG3B5e,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAEpC03C,EAAOt2D,KAAKopB,KAAKpX,GAGJ,cAARlL,GAELuvD,EADEr2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,EACjB/R,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAG3B5e,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAEpC03C,EAAOt2D,KAAKopB,KAAKpX,GAEF,YAARlL,GACPuvD,EAAOr2D,KAAKopB,KAAKrX,EAEfukD,EADEt2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACjBhS,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,EAG3B7e,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,GAIhC3Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,GACjEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,GAE/Br2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,GAGjCr2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,GAE/Br2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,IAInCnxD,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KACtEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,GAE/Bt2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,GAGjCt2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,GAE/Bt2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,MAOtCvkD,EAAGskD,EAAMrkD,EAAGskD,IASxBlzD,EAAK+P,UAAUwiD,MAAQ,SAAU5uC,GAI/B,GAFAA,EAAIa,YACJb,EAAIc,OAAO7nB,KAAKopB,KAAKrX,EAAG/R,KAAKopB,KAAKpX,GACO,GAArChS,KAAKyO,QAAQqyC,aAAapyC,QAAiB,CAC7C,GAAyC,GAArC1O,KAAKyO,QAAQqyC,aAAaC,QAAkB,CAC9C,GAAIkO,GAAMjvD,KAAKo2D,oBACf,OAAa,OAATnH,EAAIl9C,GACNgV,EAAIe,OAAO9nB,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GAC9B+U,EAAIlH,SACG,OAKPkH,EAAIwvC,iBAAiBtH,EAAIl9C,EAAEk9C,EAAIj9C,EAAEhS,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GACpD+U,EAAIlH,SACGovC,GAMT,MAFAloC,GAAIwvC,iBAAiBv2D,KAAKivD,IAAIl9C,EAAE/R,KAAKivD,IAAIj9C,EAAEhS,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GAC9D+U,EAAIlH,SACG7f,KAAKivD,IAMd,MAFAloC,GAAIe,OAAO9nB,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GAC9B+U,EAAIlH,SACG,MAYXzc,EAAK+P,UAAU8iD,QAAU,SAAUlvC,EAAKhV,EAAGC,EAAGyZ,GAE5C1E,EAAIa,YACJb,EAAI2E,IAAI3Z,EAAGC,EAAGyZ,EAAQ,EAAG,EAAIvmB,KAAKymB,IAAI,GACtC5E,EAAIlH,UAWNzc,EAAK+P,UAAU4iD,OAAS,SAAUhvC,EAAKwC,EAAMxX,EAAGC,GAC9C,GAAIuX,EAAM,CACRxC,EAAIQ,MAASvnB,KAAKopB,KAAKiqB,UAAYrzC,KAAKqpB,GAAGgqB,SAAY,QAAU,IACjErzC,KAAKyO,QAAQ8uC,SAAW,MAAQv9C,KAAKyO,QAAQ+uC,QAC7C,IAAIuW,EAEJ,IAAuB,GAAnB/zD,KAAKg0D,WAAoB,CAC3B,GAAIxtB,GAAQpiC,OAAOmlB,GAAMxhB,MAAM,MAC3ByuD,EAAYhwB,EAAM7gC,OAClB43C,EAAWr5C,OAAOlE,KAAKyO,QAAQ8uC,SACnCwW,GAAQ/hD,GAAK,EAAIwkD,GAAa,EAAIjZ,CAGlC,KAAK,GADDhrC,GAAQwU,EAAI0vC,YAAYjwB,EAAM,IAAIj0B,MAC7B/M,EAAI,EAAOgxD,EAAJhxD,EAAeA,IAAK,CAClC,GAAI8hB,GAAYP,EAAI0vC,YAAYjwB,EAAMhhC,IAAI+M,KAC1CA,GAAQ+U,EAAY/U,EAAQ+U,EAAY/U,EAE1C,GAAIC,GAASxS,KAAKyO,QAAQ8uC,SAAWiZ,EACjC/uD,EAAOsK,EAAIQ,EAAQ,EACnB5K,EAAMqK,EAAIQ,EAAS,CAGvBxS,MAAK8zD,iBAAmBnsD,IAAIA,EAAIF,KAAKA,EAAK8K,MAAMA,EAAMC,OAAOA,EAAOuhD,MAAMA,GAG/E,GAAIA,GAAQ/zD,KAAK8zD,gBAAgBC,KAEjChtC,GAAIuoC,OAE+B,cAA/BtvD,KAAKyO,QAAQwvC,iBAChBl3B,EAAIwoC,UAAUx9C,EAAGgiD,GACjB/zD,KAAK02D,yBAAyB3vC,GAC9BhV,EAAI,EACJgiD,EAAQ,GAIT/zD,KAAK22D,eAAe5vC,GACpB/mB,KAAK42D,eAAe7vC,EAAIhV,EAAEgiD,EAAOvtB,EAAOgwB,EAAWjZ,GAEnDx2B,EAAI0oC,YASLrsD,EAAK+P,UAAUujD,yBAA2B,SAAS3vC,GAClD,GAAIlI,GAAK7e,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EAC3B4M,EAAK5e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,EAC3B8kD,EAAiB3xD,KAAK4xD,MAAMj4C,EAAID,IAGf,GAAjBi4C,GAA4B,EAALj4C,GAAYi4C,EAAiB,GAAU,EAALj4C,KAC5Di4C,GAAkC3xD,KAAKymB,IAGxC5E,EAAIgwC,OAAOF,IASZzzD,EAAK+P,UAAUwjD,eAAiB,SAAS5vC,GACxC,GAA8BvgB,SAA1BxG,KAAKyO,QAAQgvC,UAAoD,OAA1Bz9C,KAAKyO,QAAQgvC,UAA+C,SAA1Bz9C,KAAKyO,QAAQgvC,SAAqB,CAC9G12B,EAAIiB,UAAYhoB,KAAKyO,QAAQgvC,QAE7B,IAAIuZ,GAAa,CAEoB,gBAA/Bh3D,KAAKyO,QAAQwvC,eACfl3B,EAAIkwC,SAAuC,IAA7Bj3D,KAAK8zD,gBAAgBvhD,MAA4C,IAA9BvS,KAAK8zD,gBAAgBthD,OAAcxS,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,QAE/F,cAA/BxS,KAAKyO,QAAQwvC,eACpBl3B,EAAIkwC,SAAuC,IAA7Bj3D,KAAK8zD,gBAAgBvhD,QAAevS,KAAK8zD,gBAAgBthD,OAASwkD,GAAah3D,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,QAExG,cAA/BxS,KAAKyO,QAAQwvC,eACpBl3B,EAAIkwC,SAAuC,IAA7Bj3D,KAAK8zD,gBAAgBvhD,MAAaykD,EAAYh3D,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,QAG7GuU,EAAIkwC,SAASj3D,KAAK8zD,gBAAgBrsD,KAAMzH,KAAK8zD,gBAAgBnsD,IAAK3H,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,UAezHpP,EAAK+P,UAAUyjD,eAAiB,SAAS7vC,EAAKhV,EAAGgiD,EAAOvtB,EAAOgwB,EAAWjZ,GAMxE,GAJDx2B,EAAIiB,UAAYhoB,KAAKyO,QAAQ6uC,WAAa,QAC1Cv2B,EAAIuB,UAAY,SAGoB,cAA/BtoB,KAAKyO,QAAQwvC,eAAgC,CAC/C,GAAI+Y,GAAa,CACkB,eAA/Bh3D,KAAKyO,QAAQwvC,gBACfl3B,EAAIwB,aAAe,aACnBwrC,GAAS,EAAIiD,GAEyB,cAA/Bh3D,KAAKyO,QAAQwvC,gBACpBl3B,EAAIwB,aAAe,UACnBwrC,GAAS,EAAIiD,GAGbjwC,EAAIwB,aAAe,aAIrBxB,GAAIwB,aAAe,QAIjBvoB,MAAKyO,QAAQivC,gBAAkB,IACjC32B,EAAIO,UAActnB,KAAKyO,QAAQivC,gBAC/B32B,EAAIY,YAAc3nB,KAAKyO,QAAQkvC,gBAC/B52B,EAAImwC,SAAc,QAErB,KAAK,GAAI1xD,GAAI,EAAOgxD,EAAJhxD,EAAeA,IACzBxF,KAAKyO,QAAQivC,gBAAkB,GAChC32B,EAAIowC,WAAW3wB,EAAMhhC,GAAIuM,EAAGgiD,GAEhChtC,EAAIyB,SAASge,EAAMhhC,GAAIuM,EAAGgiD,GAC1BA,GAASxW,GAaXn6C,EAAK+P,UAAU2hD,cAAgB,SAAS/tC,GAEtCA,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIO,UAAYtnB,KAAK01D,eAErB,IAAIzG,GAAM,IAEV,IAAwBzoD,SAApBugB,EAAIqwC,YAA2B,CACjCrwC,EAAIuoC,MAEJ,IAAI+H,IAAW,EAEbA,GAD+B7wD,SAA7BxG,KAAKyO,QAAQ0vC,KAAKx4C,QAAkDa,SAA1BxG,KAAKyO,QAAQ0vC,KAAKC,KACnDp+C,KAAKyO,QAAQ0vC,KAAKx4C,OAAO3F,KAAKyO,QAAQ0vC,KAAKC,MAG3C,EAAE,GAIfr3B,EAAIqwC,YAAYC,GAChBtwC,EAAIuwC,eAAiB,EAGrBrI,EAAMjvD,KAAK21D,MAAM5uC,GAGjBA,EAAIqwC,aAAa,IACjBrwC,EAAIuwC,eAAiB,EACrBvwC,EAAI0oC,cAIJ1oC,GAAIa,YACJb,EAAIwwC,QAAU,QACsB/wD,SAAhCxG,KAAKyO,QAAQ0vC,KAAKE,UAEpBt3B,EAAIywC,WAAWx3D,KAAKopB,KAAKrX,EAAE/R,KAAKopB,KAAKpX,EAAEhS,KAAKqpB,GAAGtX,EAAE/R,KAAKqpB,GAAGrX,GACpDhS,KAAKyO,QAAQ0vC,KAAKx4C,OAAO3F,KAAKyO,QAAQ0vC,KAAKC,IAAIp+C,KAAKyO,QAAQ0vC,KAAKE,UAAUr+C,KAAKyO,QAAQ0vC,KAAKC,MAE9D53C,SAA7BxG,KAAKyO,QAAQ0vC,KAAKx4C,QAAkDa,SAA1BxG,KAAKyO,QAAQ0vC,KAAKC,IAEnEr3B,EAAIywC,WAAWx3D,KAAKopB,KAAKrX,EAAE/R,KAAKopB,KAAKpX,EAAEhS,KAAKqpB,GAAGtX,EAAE/R,KAAKqpB,GAAGrX,GACpDhS,KAAKyO,QAAQ0vC,KAAKx4C,OAAO3F,KAAKyO,QAAQ0vC,KAAKC,OAIhDr3B,EAAIc,OAAO7nB,KAAKopB,KAAKrX,EAAG/R,KAAKopB,KAAKpX,GAClC+U,EAAIe,OAAO9nB,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,IAEhC+U,EAAIlH,QAIN,IAAI7f,KAAKyoB,MAAO,CACd,GAAIvW,EACJ,IAAyC,GAArClS,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EAAa,CAC5D,GAAI2G,GAAY,IAAK,IAAK51D,KAAKopB,KAAKrX,EAAIk9C,EAAIl9C,GAAK,IAAK/R,KAAKqpB,GAAGtX,EAAIk9C,EAAIl9C,IAClE8jD,EAAY,IAAK,IAAK71D,KAAKopB,KAAKpX,EAAIi9C,EAAIj9C,GAAK,IAAKhS,KAAKqpB,GAAGrX,EAAIi9C,EAAIj9C,GACtEE,IAASH,EAAE6jD,EAAW5jD,EAAE6jD,OAGxB3jD,GAAQlS,KAAK81D,aAAa,GAE5B91D,MAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,KAUhD5O,EAAK+P,UAAU2iD,aAAe,SAAU2B,GACtC,OACE1lD,GAAI,EAAI0lD,GAAcz3D,KAAKopB,KAAKrX,EAAI0lD,EAAaz3D,KAAKqpB,GAAGtX,EACzDC,GAAI,EAAIylD,GAAcz3D,KAAKopB,KAAKpX,EAAIylD,EAAaz3D,KAAKqpB,GAAGrX,IAa7D5O,EAAK+P,UAAU+iD,eAAiB,SAAUnkD,EAAGC,EAAGyZ,EAAQgsC,GACtD,GAAIrJ,GAA6B,GAApBqJ,EAAa,EAAE,GAASvyD,KAAKymB,EAC1C,QACE5Z,EAAGA,EAAI0Z,EAASvmB,KAAKqZ,IAAI6vC,GACzBp8C,EAAGA,EAAIyZ,EAASvmB,KAAKkZ,IAAIgwC,KAW7BhrD,EAAK+P,UAAU0hD,iBAAmB,SAAS9tC,GACzC,GAAI7U,EAMJ,IAJA6U,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIiB,UAAYjB,EAAIY,YACpBZ,EAAIO,UAAYtnB,KAAK01D,gBAEjB11D,KAAKopB,MAAQppB,KAAKqpB,GAAI,CAExB,GAAI4lC,GAAMjvD,KAAK21D,MAAM5uC,GAEjBqnC,EAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,GACrEpM,GAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,gBAE1D,IAAyC,GAArCl+C,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EAAa,CAC5D,GAAI2G,GAAY,IAAK,IAAK51D,KAAKopB,KAAKrX,EAAIk9C,EAAIl9C,GAAK,IAAK/R,KAAKqpB,GAAGtX,EAAIk9C,EAAIl9C,IAClE8jD,EAAY,IAAK,IAAK71D,KAAKopB,KAAKpX,EAAIi9C,EAAIj9C,GAAK,IAAKhS,KAAKqpB,GAAGrX,EAAIi9C,EAAIj9C,GACtEE,IAASH,EAAE6jD,EAAW5jD,EAAE6jD,OAGxB3jD,GAAQlS,KAAK81D,aAAa,GAG5B/uC,GAAI2wC,MAAMxlD,EAAMH,EAAGG,EAAMF,EAAGo8C,EAAOzoD,GACnCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,OACPzoB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,OAG3C,CAEH,GAAID,GAAGC,EACHyZ,EAAS,IAAOvmB,KAAKwH,IAAI,IAAI1M,KAAKw+C,QAAQK,cAC1CiH,EAAO9lD,KAAKopB,IACX08B,GAAKvzC,OACRuzC,EAAKkQ,OAAOjvC,GAEV++B,EAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAiB,GAAb+zC,EAAKvzC,MAClBP,EAAI8zC,EAAK9zC,EAAIyZ,IAGb1Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAkB,GAAd8zC,EAAKtzC,QAEpBxS,KAAKi2D,QAAQlvC,EAAKhV,EAAGC,EAAGyZ,EAGxB,IAAI2iC,GAAQ,GAAMlpD,KAAKymB,GACnBhmB,GAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,gBAC1DhsC,GAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1C1E,EAAI2wC,MAAMxlD,EAAMH,EAAGG,EAAMF,EAAGo8C,EAAOzoD,GACnCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,QACPvW,EAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1CzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,MAKlD5O,EAAK+P,UAAUwkD,eAAiB,SAAS7pD,GACvC,GAAImhD,GAAMjvD,KAAKo2D,qBAEXrkD,EAAI7M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAG9N,KAAKopB,KAAKrX,EAAK,EAAEjE,GAAG,EAAIA,GAAImhD,EAAIl9C,EAAI7M,KAAK4uB,IAAIhmB,EAAE,GAAG9N,KAAKqpB,GAAGtX,EAC9EC,EAAI9M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAG9N,KAAKopB,KAAKpX,EAAK,EAAElE,GAAG,EAAIA,GAAImhD,EAAIj9C,EAAI9M,KAAK4uB,IAAIhmB,EAAE,GAAG9N,KAAKqpB,GAAGrX,CAElF,QAAQD,EAAEA,EAAEC,EAAEA,IAWhB5O,EAAK+P,UAAUykD,oBAAsB,SAASxuC,EAAKrC,GACjD,GAIIxB,GAAI6oC,EAAMyJ,EAAkBC,EAAiBC,EAJ7C/oD,EAAgB,GAChBC,EAAY,EACZC,EAAM,EACNC,EAAO,EAEP6oD,EAAY,GACZlS,EAAO9lD,KAAKqpB,EAKhB,KAJY,GAARD,IACF08B,EAAO9lD,KAAKopB,MAGAja,GAAPD,GAA2BF,EAAZC,GAA2B,CAC/C,GAAIG,GAAwB,IAAdF,EAAMC,EAOpB,IALAoW,EAAMvlB,KAAK23D,eAAevoD,GAC1Bg/C,EAAQlpD,KAAK4xD,MAAOhR,EAAK9zC,EAAIuT,EAAIvT,EAAK8zC,EAAK/zC,EAAIwT,EAAIxT,GACnD8lD,EAAmB/R,EAAK+R,iBAAiB9wC,EAAIqnC,GAC7C0J,EAAkB5yD,KAAKyqB,KAAKzqB,KAAK4uB,IAAIvO,EAAIxT,EAAE+zC,EAAK/zC,EAAE,GAAK7M,KAAK4uB,IAAIvO,EAAIvT,EAAE8zC,EAAK9zC,EAAE,IAC7E+lD,EAAaF,EAAmBC,EAC5B5yD,KAAK2lB,IAAIktC,GAAcC,EACzB,KAEoB,GAAbD,EACK,GAAR3uC,EACFla,EAAME,EAGND,EAAOC,EAIG,GAARga,EACFja,EAAOC,EAGPF,EAAME,EAIVH,IAIF,MAFAsW,GAAIzX,EAAIsB,EAEDmW,GAUTniB,EAAK+P,UAAUyhD,WAAa,SAAS7tC,GAEnCA,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIiB,UAAYjB,EAAIY,YACpBZ,EAAIO,UAAYtnB,KAAK01D,eAGrB,IAAItH,GAAOzoD,EAAQsyD,CAGnB,IAAIj4D,KAAKopB,MAAQppB,KAAKqpB,GAAI,CAKxB,GAHArpB,KAAK21D,MAAM5uC,GAG8B,GAArC/mB,KAAKyO,QAAQqyC,aAAapyC,QAAiB,CAC7C,GAAIugD,GAAMjvD,KAAKo2D,oBACf6B,GAAWj4D,KAAK43D,qBAAoB,EAAO7wC,EAC3C,IAAImxC,GAAWl4D,KAAK23D,eAAezyD,KAAKwH,IAAI,EAAKurD,EAASnqD,EAAI,IAC9DsgD,GAAQlpD,KAAK4xD,MAAOmB,EAASjmD,EAAIkmD,EAASlmD,EAAKimD,EAASlmD,EAAImmD,EAASnmD,OAElE,CACHq8C,EAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EACrE,IAAI6M,GAAM5e,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EAC5B8M,EAAM7e,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAC5BmmD,EAAoBjzD,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAC7Cu5C,EAAep4D,KAAKqpB,GAAGwuC,iBAAiB9wC,EAAKqnC,GAC7CiK,GAAiBF,EAAoBC,GAAgBD,CAEzDF,MACAA,EAASlmD,GAAK,EAAIsmD,GAAiBr4D,KAAKopB,KAAKrX,EAAIsmD,EAAgBr4D,KAAKqpB,GAAGtX,EACzEkmD,EAASjmD,GAAK,EAAIqmD,GAAiBr4D,KAAKopB,KAAKpX,EAAIqmD,EAAgBr4D,KAAKqpB,GAAGrX,EAU3E,GANArM,GAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,iBACtDn3B,EAAI2wC,MAAMO,EAASlmD,EAAEkmD,EAASjmD,EAAGo8C,EAAOzoD,GACxCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,MAAO,CACd,GAAIvW,EAEFA,GADuC,GAArClS,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EACvCjvD,KAAK23D,eAAe,IAGpB33D,KAAK81D,aAAa,IAE5B91D,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,QAG3C,CAEH,GACID,GAAGC,EAAG0lD,EADN5R,EAAO9lD,KAAKopB,KAEZqC,EAAS,IAAOvmB,KAAKwH,IAAI,IAAI1M,KAAKw+C,QAAQK,aACzCiH,GAAKvzC,OACRuzC,EAAKkQ,OAAOjvC,GAEV++B,EAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAiB,GAAb+zC,EAAKvzC,MAClBP,EAAI8zC,EAAK9zC,EAAIyZ,EACbisC,GACE3lD,EAAGA,EACHC,EAAG8zC,EAAK9zC,EACRo8C,MAAO,GAAMlpD,KAAKymB,MAIpB5Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAkB,GAAd8zC,EAAKtzC,OAClBklD,GACE3lD,EAAG+zC,EAAK/zC,EACRC,EAAGA,EACHo8C,MAAO,GAAMlpD,KAAKymB,KAGtB5E,EAAIa,YAEJb,EAAI2E,IAAI3Z,EAAGC,EAAGyZ,EAAQ,EAAG,EAAIvmB,KAAKymB,IAAI,GACtC5E,EAAIlH,QAGJ,IAAIla,IAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,gBAC1Dn3B,GAAI2wC,MAAMA,EAAM3lD,EAAG2lD,EAAM1lD,EAAG0lD,EAAMtJ,MAAOzoD,GACzCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,QACPvW,EAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1CzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,MAiBlD5O,EAAK+P,UAAUoiD,mBAAqB,SAAU+C,EAAGC,EAAIC,EAAGC,EAAIC,EAAGC,GAC7D,GAAInvD,GAAc,CAClB,IAAIxJ,KAAKopB,MAAQppB,KAAKqpB,GACpB,GAAyC,GAArCrpB,KAAKyO,QAAQqyC,aAAapyC,QAAiB,CAC7C,GAAI2nD,GAAMC,CACV,IAAyC,GAArCt2D,KAAKyO,QAAQqyC,aAAapyC,SAAwD,GAArC1O,KAAKyO,QAAQqyC,aAAaC,QACzEsV,EAAOr2D,KAAKivD,IAAIl9C,EAChBukD,EAAOt2D,KAAKivD,IAAIj9C,MAEb,CACH,GAAIi9C,GAAMjvD,KAAKo2D,oBACfC,GAAOpH,EAAIl9C,EACXukD,EAAOrH,EAAIj9C,EAEb,GACI2T,GACAngB,EAAEsI,EAAEiE,EAAEC,EAAG4mD,EAAOC,EAFhBC,EAAc,GAGlB,KAAKtzD,EAAI,EAAO,GAAJA,EAAQA,IAClBsI,EAAI,GAAItI,EACRuM,EAAI7M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAGwqD,EAAM,EAAExqD,GAAG,EAAIA,GAAIuoD,EAAOnxD,KAAK4uB,IAAIhmB,EAAE,GAAG0qD,EAC5DxmD,EAAI9M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAGyqD,EAAM,EAAEzqD,GAAG,EAAIA,GAAIwoD,EAAOpxD,KAAK4uB,IAAIhmB,EAAE,GAAG2qD,EACxDjzD,EAAI,IACNmgB,EAAW3lB,KAAK+4D,mBAAmBH,EAAMC,EAAM9mD,EAAEC,EAAG0mD,EAAGC,GACvDG,EAAyBA,EAAXnzC,EAAyBA,EAAWmzC,GAEpDF,EAAQ7mD,EAAG8mD,EAAQ7mD,CAErBxI,GAAcsvD,MAGdtvD,GAAcxJ,KAAK+4D,mBAAmBT,EAAGC,EAAGC,EAAGC,EAAGC,EAAGC,OAGpD,CACH,GAAI5mD,GAAGC,EAAG4M,EAAIC,EACV4M,EAAS,IAAOzrB,KAAKw+C,QAAQK,aAC7BiH,EAAO9lD,KAAKopB,IACZ08B,GAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,MACxBP,EAAI8zC,EAAK9zC,EAAIyZ,IAGb1Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,QAE1BoM,EAAK7M,EAAI2mD,EACT75C,EAAK7M,EAAI2mD,EACTnvD,EAActE,KAAK2lB,IAAI3lB,KAAKyqB,KAAK/Q,EAAGA,EAAKC,EAAGA,GAAM4M,GAGpD,MAAIzrB,MAAK8zD,gBAAgBrsD,KAAOixD,GAC9B14D,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,MAAQmmD,GACzD14D,KAAK8zD,gBAAgBnsD,IAAMgxD,GAC3B34D,KAAK8zD,gBAAgBnsD,IAAM3H,KAAK8zD,gBAAgBthD,OAASmmD,EAClD,EAGAnvD,GAIXpG,EAAK+P,UAAU4lD,mBAAqB,SAAST,EAAGC,EAAGC,EAAGC,EAAGC,EAAGC,GAC1D,GAAIK,GAAKR,EAAGF,EACVW,EAAKR,EAAGF,EACRW,EAAYF,EAAGA,EAAKC,EAAGA,EACvBE,IAAOT,EAAKJ,GAAMU,GAAML,EAAKJ,GAAMU,GAAMC,CAEvCC,GAAI,EACNA,EAAI,EAEO,EAAJA,IACPA,EAAI,EAGN,IAAIpnD,GAAIumD,EAAKa,EAAIH,EACfhnD,EAAIumD,EAAKY,EAAIF,EACbr6C,EAAK7M,EAAI2mD,EACT75C,EAAK7M,EAAI2mD,CAQX,OAAOzzD,MAAKyqB,KAAK/Q,EAAGA,EAAKC,EAAGA,IAQ9Bzb,EAAK+P,UAAU6vB,SAAW,SAAS/lB,GACjCjd,KAAKm2D,gBAAkB,EAAIl5C,GAI7B7Z,EAAK+P,UAAUg+B,OAAS,WACtBnxC,KAAKqzC,UAAW,GAGlBjwC,EAAK+P,UAAU+9B,SAAW,WACxBlxC,KAAKqzC,UAAW,GAGlBjwC,EAAK+P,UAAUg/C,mBAAqB,WACjB,OAAbnyD,KAAKivD,KAA8B,OAAdjvD,KAAKopB,MAA6B,OAAZppB,KAAKqpB,IAClDrpB,KAAKivD,IAAIl9C,EAAI,IAAO/R,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAC1C/R,KAAKivD,IAAIj9C,EAAI,IAAOhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KAG1ChS,KAAKivD,IAAIl9C,EAAI,EACb/R,KAAKivD,IAAIj9C,EAAI,IASjB5O,EAAK+P,UAAU+8C,kBAAoB,SAASnpC,GAC1C,GAAgC,GAA5B/mB,KAAKu0D,oBAA6B,CACpC,GAA+B,OAA3Bv0D,KAAKw0D,aAAaprC,MAA0C,OAAzBppB,KAAKw0D,aAAanrC,GAAa,CACpE,GAAI+vC,GAAa,cAAcplD,OAAOhU,KAAKK,IACvCg5D,EAAW,YAAYrlD,OAAOhU,KAAKK,IACnCqhD,GACY1E,OAAO/qC,MAAM,GAAIwZ,OAAO,EAAGzL,YAAY,EAAG69B,oBAAqB,GAC/DW,SAASO,QAAQ,GACjBI,YAAac,sBAAuB,EAAGD,aAAcztC,MAAM,EAAGC,OAAQ,EAAGiZ,OAAO,IAEhGzrB,MAAKw0D,aAAaprC,KAAO,GAAI7lB,IAC1BlD,GAAG+4D,EACFhc,MAAM,MACJjyC,OAAOgB,WAAW,UAAWC,OAAO,UAAWC,WAAYF,WAAW,mBAClEu1C,GACV1hD,KAAKw0D,aAAanrC,GAAK,GAAI9lB,IACxBlD,GAAGg5D,EACFjc,MAAM,MACNjyC,OAAOgB,WAAW,UAAWC,OAAO,UAAWC,WAAYF,WAAW,mBAChEu1C,GAGZ1hD,KAAKw0D,aAAaC,aACqB,GAAnCz0D,KAAKw0D,aAAaprC,KAAKiqB,WACzBrzC,KAAKw0D,aAAaC,UAAUrrC,KAAOppB,KAAKs5D,2BAA2BvyC,GACnE/mB,KAAKw0D,aAAaprC,KAAKrX,EAAI/R,KAAKw0D,aAAaC,UAAUrrC,KAAKrX,EAC5D/R,KAAKw0D,aAAaprC,KAAKpX,EAAIhS,KAAKw0D,aAAaC,UAAUrrC,KAAKpX,GAEzB,GAAjChS,KAAKw0D,aAAanrC,GAAGgqB,WACvBrzC,KAAKw0D,aAAaC,UAAUprC,GAAKrpB,KAAKu5D,yBAAyBxyC,GAC/D/mB,KAAKw0D,aAAanrC,GAAGtX,EAAI/R,KAAKw0D,aAAaC,UAAUprC,GAAGtX,EACxD/R,KAAKw0D,aAAanrC,GAAGrX,EAAIhS,KAAKw0D,aAAaC,UAAUprC,GAAGrX,GAG1DhS,KAAKw0D,aAAaprC,KAAK2iB,KAAKhlB,GAC5B/mB,KAAKw0D,aAAanrC,GAAG0iB,KAAKhlB,OAG1B/mB,MAAKw0D,cAAgBprC,KAAK,KAAMC,GAAG,KAAMorC,eAQ7CrxD,EAAK+P,UAAUqmD,oBAAsB,WACnCx5D,KAAKi0D,WAAaj0D,KAAKopB,KACvBppB,KAAKk0D,SAAWl0D,KAAKqpB,GACrBrpB,KAAKu0D,qBAAsB,GAO7BnxD,EAAK+P,UAAUsmD,qBAAuB,WACpCz5D,KAAK2zD,OAAS3zD,KAAKopB,KAAK/oB,GACxBL,KAAK4zD,KAAO5zD,KAAKqpB,GAAGhpB,GAChBL,KAAK2zD,QAAU3zD,KAAKi0D,WAAW5zD,GACjCL,KAAKi0D,WAAWe,WAAWh1D,MAEpBA,KAAK4zD,MAAQ5zD,KAAKk0D,SAAS7zD,IAClCL,KAAKk0D,SAASc,WAAWh1D,MAG3BA,KAAKi0D,WAAa,KAClBj0D,KAAKk0D,SAAW,KAChBl0D,KAAKu0D,qBAAsB,GAW7BnxD,EAAK+P,UAAUumD,wBAA0B,SAAS3nD,EAAEC,GAClD,GAAIyiD,GAAYz0D,KAAKw0D,aAAaC,UAC9BkF,EAAez0D,KAAKyqB,KAAKzqB,KAAK4uB,IAAI/hB,EAAI0iD,EAAUrrC,KAAKrX,EAAE,GAAK7M,KAAK4uB,IAAI9hB,EAAIyiD,EAAUrrC,KAAKpX,EAAE,IAC1F4nD,EAAe10D,KAAKyqB,KAAKzqB,KAAK4uB,IAAI/hB,EAAI0iD,EAAUprC,GAAGtX,EAAI,GAAK7M,KAAK4uB,IAAI9hB,EAAIyiD,EAAUprC,GAAGrX,EAAI,GAE9F,OAAmB,IAAf2nD,GACF35D,KAAK00D,cAAgB10D,KAAKopB,KAC1BppB,KAAKopB,KAAOppB,KAAKw0D,aAAaprC,KACvBppB,KAAKw0D,aAAaprC,MAEL,GAAbwwC,GACP55D,KAAK00D,cAAgB10D,KAAKqpB,GAC1BrpB,KAAKqpB,GAAKrpB,KAAKw0D,aAAanrC,GACrBrpB,KAAKw0D,aAAanrC,IAGlB,MASXjmB,EAAK+P,UAAU0mD,qBAAuB,WACG,GAAnC75D,KAAKw0D,aAAaprC,KAAKiqB,UACzBrzC,KAAKopB,KAAOppB,KAAK00D,cACjB10D,KAAK00D,cAAgB,KACrB10D,KAAKw0D,aAAaprC,KAAK8nB,YAEiB,GAAjClxC,KAAKw0D,aAAanrC,GAAGgqB,WAC5BrzC,KAAKqpB,GAAKrpB,KAAK00D,cACf10D,KAAK00D,cAAgB,KACrB10D,KAAKw0D,aAAanrC,GAAG6nB,aAUzB9tC,EAAK+P,UAAUmmD,2BAA6B,SAASvyC,GAEnD,GAAI+yC,EACJ,IAAyC,GAArC95D,KAAKyO,QAAQqyC,aAAapyC,QAC5BorD,EAAqB95D,KAAK43D,qBAAoB,EAAM7wC,OAEjD,CACH,GAAIqnC,GAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,GACrE6M,EAAM5e,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EAC5B8M,EAAM7e,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAC5BmmD,EAAoBjzD,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAE7Ck7C,EAAiB/5D,KAAKopB,KAAKyuC,iBAAiB9wC,EAAKqnC,EAAQlpD,KAAKymB,IAC9DquC,GAAmB7B,EAAoB4B,GAAkB5B,CAC7D2B,MACAA,EAAmB/nD,EAAI,EAAoB/R,KAAKopB,KAAKrX,GAAK,EAAIioD,GAAmBh6D,KAAKqpB,GAAGtX,EACzF+nD,EAAmB9nD,EAAI,EAAoBhS,KAAKopB,KAAKpX,GAAK,EAAIgoD,GAAmBh6D,KAAKqpB,GAAGrX,EAG3F,MAAO8nD,IAST12D,EAAK+P,UAAUomD,yBAA2B,SAASxyC,GAEjD,GAAuBkzC,EACvB,IAAyC,GAArCj6D,KAAKyO,QAAQqyC,aAAapyC,QAC5BurD,EAAmBj6D,KAAK43D,qBAAoB,EAAO7wC,OAEhD,CACH,GAAIqnC,GAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,GACrE6M,EAAM5e,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EAC5B8M,EAAM7e,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAC5BmmD,EAAoBjzD,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAC7Cu5C,EAAep4D,KAAKqpB,GAAGwuC,iBAAiB9wC,EAAKqnC,GAC7CiK,GAAiBF,EAAoBC,GAAgBD,CAEzD8B,MACAA,EAAiBloD,GAAK,EAAIsmD,GAAiBr4D,KAAKopB,KAAKrX,EAAIsmD,EAAgBr4D,KAAKqpB,GAAGtX,EACjFkoD,EAAiBjoD,GAAK,EAAIqmD,GAAiBr4D,KAAKopB,KAAKpX,EAAIqmD,EAAgBr4D,KAAKqpB,GAAGrX,EAGnF,MAAOioD,IAGTp6D,EAAOD,QAAUwD,GAIb,SAASvD,EAAQD,EAASM,GAQ9B,QAASmD,KACPrD,KAAKyW,QACLzW,KAAKk6D,aAAe,EARXh6D,EAAoB,EAe/BmD,GAAO82D,UACJ/tD,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aAO3I9I,EAAO8P,UAAUsD,MAAQ,WACvBzW,KAAKm0B,UACLn0B,KAAKm0B,OAAOxuB,OAAS,WAEnB,GAAIH,GAAI,CACR,KAAM,GAAI9E,KAAKV,MACTA,KAAK8F,eAAepF,IACtB8E,GAGJ,OAAOA,KAWXnC,EAAO8P,UAAU+B,IAAM,SAAUkzC,GAC/B,GAAIn2C,GAAQjS,KAAKm0B,OAAOi0B,EACxB,IAAa5hD,QAATyL,EAAoB,CAEtB,GAAI9J,GAAQnI,KAAKk6D,aAAe72D,EAAO82D,QAAQx0D,MAC/C3F,MAAKk6D,eACLjoD,KACAA,EAAM9G,MAAQ9H,EAAO82D,QAAQhyD,GAC7BnI,KAAKm0B,OAAOi0B,GAAan2C,EAG3B,MAAOA,IAUT5O,EAAO8P,UAAUF,IAAM,SAAUm1C,EAAWn7C,GAE1C,MADAjN,MAAKm0B,OAAOi0B,GAAan7C,EAClBA,GAGTpN,EAAOD,QAAUyD,GAKb,SAASxD,GAMb,QAASyD,KACPtD,KAAK2iD,UACL3iD,KAAKo6D,eACLp6D,KAAKsI,SAAW9B,OAQlBlD,EAAO6P,UAAUyvC,kBAAoB,SAASt6C,GAC5CtI,KAAKsI,SAAWA,GASlBhF,EAAO6P,UAAUknD,KAAO,SAASC,EAAKC,GACpC,GAAIC,GAAMx6D,KAAK2iD,OAAO2X,EACtB,IAAY9zD,SAARg0D,EAAmB,CAErB,GAAIrmD,GAAKnU,IACTw6D,GAAM,GAAIC,OACVD,EAAIE,OAAS,WAEO,GAAd16D,KAAKuS,QACPhB,SAASojB,KAAKljB,YAAYzR,MAC1BA,KAAKuS,MAAQvS,KAAKowB,YAClBpwB,KAAKwS,OAASxS,KAAKswB,aACnB/e,SAASojB,KAAKxjB,YAAYnR,OAGxBmU,EAAG7L,WACL6L,EAAGwuC,OAAO2X,GAAOE,EACjBrmD,EAAG7L,SAAStI,QAIhBw6D,EAAIG,QAAU,WACMn0D,SAAd+zD,GACF5hC,QAAQiiC,MAAM,wBAAyBN,SAChCt6D,MAAK4lD,IACRzxC,EAAG7L,UACL6L,EAAG7L,SAAStI,OAGPmU,EAAGimD,YAAYE,MAAS,GAC/B3hC,QAAQiiC,MAAM,8BAA+BL,SACtCv6D,MAAK4lD,IACRzxC,EAAG7L,UACL6L,EAAG7L,SAAStI,QAIdA,KAAK4lD,IAAM2U,EACXpmD,EAAGimD,YAAYE,IAAO,IAI1BE,EAAI5U,IAAM0U,EAGZ,MAAOE,IAGT36D,EAAOD,QAAU0D,GAKb,SAASzD,EAAQD,EAASM,GA6B9B,QAASqD,GAAKorD,EAAYkM,EAAWC,EAAWpH,GAC9C,GAAIhS,GAAY/gD,EAAKsN,uBAAuB,SAASylD,EACrD1zD,MAAKyO,QAAUizC,EAAU1E,MAEzBh9C,KAAKqzC,UAAW,EAChBrzC,KAAKsM,OAAQ,EAEbtM,KAAK89C,SACL99C,KAAKmvD,gBACLnvD,KAAK+6D,iBAEL/6D,KAAKg7D,kBAAoB,EAGzBh7D,KAAKK,GAAKmG,OACVxG,KAAKwyD,gBAAiB,EACtBxyD,KAAKyyD,gBAAiB,EACtBzyD,KAAKmrD,QAAS,EACdnrD,KAAKorD,QAAS,EACdprD,KAAKi7D,qBAAsB,EAC3Bj7D,KAAKk7D,kBAAsB,EAC3Bl7D,KAAKm7D,gBAAkBzH,EAAiB1W,MAAMvxB,OAC9CzrB,KAAKo7D,aAAc,EACnBp7D,KAAK49C,MAAQ,GACb59C,KAAKq7D,kBAAmB,EACxBr7D,KAAKs7D,qBAAsB,EAC3Bt7D,KAAK8zD,iBAAmBnsD,IAAI,EAAGF,KAAK,EAAG8K,MAAM,EAAGC,OAAO,EAAGuhD,MAAM,GAChE/zD,KAAKomD,aAAez+C,IAAI,EAAGF,KAAK,EAAG4f,MAAM,EAAG/D,OAAO,GAEnDtjB,KAAK66D,UAAYA,EACjB76D,KAAK86D,UAAYA,EAGjB96D,KAAKu7D,GAAK,EACVv7D,KAAKw7D,GAAK,EACVx7D,KAAKy7D,GAAK,EACVz7D,KAAK07D,GAAK,EACV17D,KAAK+R,EAAI,KACT/R,KAAKgS,EAAI,KAGThS,KAAK27D,eAAiBF,GAAG,EAAEC,GAAG,EAAE3pD,EAAE,EAAEC,EAAE,GAEtChS,KAAK++C,QAAU2U,EAAiBlV,QAAQO,QACxC/+C,KAAKswD,WAAav+C,EAAE,KAAKC,EAAE,MAE3BhS,KAAK0uD,cAAcC,EAAYjN,GAG/B1hD,KAAK47D,eACL57D,KAAK67D,mBAAqB,EAC1B77D,KAAK87D,eAAiB,EACtB97D,KAAK+7D,uBAA0BrI,EAAiBvU,WAAWa,YAAYztC,MACvEvS,KAAKg8D,wBAA0BtI,EAAiBvU,WAAWa,YAAYxtC,OACvExS,KAAKi8D,wBAA0BvI,EAAiBvU,WAAWa,YAAYv0B,OACvEzrB,KAAKigD,sBAAwByT,EAAiBvU,WAAWc,sBACzDjgD,KAAKk8D,gBAAkB,EAGvBl8D,KAAKm2D,gBAAkB,EACvBn2D,KAAKm8D,aAAe,EACpBn8D,KAAK+jD,eAAiBhyC,EAAK,KAAMC,EAAK,MACtChS,KAAKgkD,mBAAqBjyC,EAAM,IAAKC,EAAM,KAC3ChS,KAAKiyD,aAAe,KA1FtB,GAAItxD,GAAOT,EAAoB,EAiG/BqD,GAAK4P,UAAU69C,eAAiB,WAC9BhxD,KAAK+R,EAAI/R,KAAK27D,cAAc5pD,EAC5B/R,KAAKgS,EAAIhS,KAAK27D,cAAc3pD,EAC5BhS,KAAKy7D,GAAKz7D,KAAK27D,cAAcF,GAC7Bz7D,KAAK07D,GAAK17D,KAAK27D,cAAcD,IAO/Bn4D,EAAK4P,UAAUyoD,aAAe,WAE5B57D,KAAKo8D,eAAiB51D,OACtBxG,KAAKq8D,YAAc,EACnBr8D,KAAKs8D,kBACLt8D,KAAKu8D,kBACLv8D,KAAKw8D,oBAOPj5D,EAAK4P,UAAU4hD,WAAa,SAASrH,GACH,IAA5B1tD,KAAK89C,MAAMn3C,QAAQ+mD,IACrB1tD,KAAK89C,MAAM91C,KAAK0lD,GAEqB,IAAnC1tD,KAAKmvD,aAAaxoD,QAAQ+mD,IAC5B1tD,KAAKmvD,aAAannD,KAAK0lD,GAEzB1tD,KAAK67D,mBAAqB77D,KAAKmvD,aAAaxpD,QAO9CpC,EAAK4P,UAAU6hD,WAAa,SAAStH,GACnC,GAAIvlD,GAAQnI,KAAK89C,MAAMn3C,QAAQ+mD,EAClB,KAATvlD,GACFnI,KAAK89C,MAAM11C,OAAOD,EAAO,GAE3BA,EAAQnI,KAAKmvD,aAAaxoD,QAAQ+mD,GACrB,IAATvlD,GACFnI,KAAKmvD,aAAa/mD,OAAOD,EAAO,GAElCnI,KAAK67D,mBAAqB77D,KAAKmvD,aAAaxpD,QAS9CpC,EAAK4P,UAAUu7C,cAAgB,SAASC,EAAYjN,GAClD,GAAKiN,EAAL,CAIA,GAAIzgD,IAAU,cAAc,sBAAsB,QAAQ,QAAQ,cAAc,SAAS,YACvF,WAAW,WAAW,WAAW,kBAAkB,kBAAkB,QAAQ,OAkB/E,IAhBAvN,EAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASkgD,GAGzBnoD,SAAlBmoD,EAAWtuD,KAA0BL,KAAKK,GAAKsuD,EAAWtuD,IACrCmG,SAArBmoD,EAAWlmC,QAA0BzoB,KAAKyoB,MAAQkmC,EAAWlmC,MAAOzoB,KAAKy8D,cAAgB9N,EAAWlmC,OAC/EjiB,SAArBmoD,EAAW3pB,QAA0BhlC,KAAKglC,MAAQ2pB,EAAW3pB,OAC5Cx+B,SAAjBmoD,EAAW58C,IAA0B/R,KAAK+R,EAAI48C,EAAW58C,GACxCvL,SAAjBmoD,EAAW38C,IAA0BhS,KAAKgS,EAAI28C,EAAW38C,GACpCxL,SAArBmoD,EAAWtnD,QAA0BrH,KAAKqH,MAAQsnD,EAAWtnD,OACxCb,SAArBmoD,EAAW/Q,QAA0B59C,KAAK49C,MAAQ+Q,EAAW/Q,MAAO59C,KAAKq7D,kBAAmB,GAGzD70D,SAAnCmoD,EAAWsM,sBAAoCj7D,KAAKi7D,oBAAsBtM,EAAWsM,qBAClDz0D,SAAnCmoD,EAAWuM,mBAAoCl7D,KAAKk7D,iBAAsBvM,EAAWuM,kBAClD10D,SAAnCmoD,EAAW+N,kBAAoC18D,KAAK08D,gBAAsB/N,EAAW+N,iBAEzEl2D,SAAZxG,KAAKK,GACP,KAAM,sBAIR,IAAkC,gBAAvBL,MAAKyO,QAAQwD,OAAqD,gBAAvBjS,MAAKyO,QAAQwD,OAA4C,IAAtBjS,KAAKyO,QAAQwD,MAAc,CAClH,GAAI0qD,GAAW38D,KAAK86D,UAAU5lD,IAAIlV,KAAKyO,QAAQwD,MAC/CtR,GAAK8F,WAAWzG,KAAKyO,QAASkuD,GAE9B38D,KAAKyO,QAAQtD,MAAQxK,EAAKuK,WAAWlL,KAAKyO,QAAQtD,OAMpD,GAF0B3E,SAAtBmoD,EAAWljC,SAA+BzrB,KAAKm7D,gBAAkBn7D,KAAKyO,QAAQgd,QACzDjlB,SAArBmoD,EAAWxjD,QAA+BnL,KAAKyO,QAAQtD,MAAQxK,EAAKuK,WAAWyjD,EAAWxjD,QACnE3E,SAAvBxG,KAAKyO,QAAQ4uC,OAA4C,IAArBr9C,KAAKyO,QAAQ4uC,MAAY,CAC/D,IAAIr9C,KAAK66D,UAIP,KAAM,uBAHN76D,MAAK48D,SAAW58D,KAAK66D,UAAUR,KAAKr6D,KAAKyO,QAAQ4uC,MAAOr9C,KAAKyO,QAAQouD,aAgCzE,OAzBkCr2D,SAA9BmoD,EAAW6D,gBACbxyD,KAAKmrD,QAAUwD,EAAW6D,eAC1BxyD,KAAKwyD,eAAiB7D,EAAW6D,gBAEThsD,SAAjBmoD,EAAW58C,GAA0C,GAAvB/R,KAAKwyD,iBAC1CxyD,KAAKmrD,QAAS,GAIkB3kD,SAA9BmoD,EAAW8D,gBACbzyD,KAAKorD,QAAUuD,EAAW8D,eAC1BzyD,KAAKyyD,eAAiB9D,EAAW8D,gBAETjsD,SAAjBmoD,EAAW38C,GAA0C,GAAvBhS,KAAKyyD,iBAC1CzyD,KAAKorD,QAAS,GAGhBprD,KAAKo7D,YAAcp7D,KAAKo7D,aAAsC50D,SAAtBmoD,EAAWljC,QAExB,UAAvBzrB,KAAKyO,QAAQ2uC,OAA4C,kBAAvBp9C,KAAKyO,QAAQ2uC,SACjDp9C,KAAKyO,QAAQyuC,UAAYwE,EAAU1E,MAAM91B,SACzClnB,KAAKyO,QAAQ0uC,UAAYuE,EAAU1E,MAAM71B,UAInCnnB,KAAKyO,QAAQ2uC,OACnB,IAAK,WAAiBp9C,KAAK+rC,KAAO/rC,KAAK88D,cAAe98D,KAAKg2D,OAASh2D,KAAK+8D,eAAiB,MAC1F,KAAK,MAAiB/8D,KAAK+rC,KAAO/rC,KAAKg9D,SAAUh9D,KAAKg2D,OAASh2D,KAAKi9D,UAAY,MAChF,KAAK,SAAiBj9D,KAAK+rC,KAAO/rC,KAAKk9D,YAAal9D,KAAKg2D,OAASh2D,KAAKm9D,aAAe,MACtF,KAAK,UAAiBn9D,KAAK+rC,KAAO/rC,KAAKo9D,aAAcp9D,KAAKg2D,OAASh2D,KAAKq9D,cAAgB,MAExF,KAAK,QAAiBr9D,KAAK+rC,KAAO/rC,KAAKs9D,WAAYt9D,KAAKg2D,OAASh2D,KAAKu9D,YAAc,MACpF,KAAK,gBAAiBv9D,KAAK+rC,KAAO/rC,KAAKw9D,mBAAoBx9D,KAAKg2D,OAASh2D,KAAKy9D,oBAAsB,MACpG,KAAK,OAAiBz9D,KAAK+rC,KAAO/rC,KAAK09D,UAAW19D,KAAKg2D,OAASh2D,KAAK29D,WAAa,MAClF,KAAK,MAAiB39D,KAAK+rC,KAAO/rC,KAAK49D,SAAU59D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MAClF,KAAK,SAAiB79D,KAAK+rC,KAAO/rC,KAAK89D,YAAa99D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MACrF,KAAK,WAAiB79D,KAAK+rC,KAAO/rC,KAAK+9D,cAAe/9D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MACvF,KAAK,eAAiB79D,KAAK+rC,KAAO/rC,KAAKg+D,kBAAmBh+D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MAC3F,KAAK,OAAiB79D,KAAK+rC,KAAO/rC,KAAKi+D,UAAWj+D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MACnF,SAAsB79D,KAAK+rC,KAAO/rC,KAAKo9D,aAAcp9D,KAAKg2D,OAASh2D,KAAKq9D,eAG1Er9D,KAAKk+D,WAOP36D,EAAK4P,UAAUg+B,OAAS,WACtBnxC,KAAKqzC,UAAW,EAChBrzC,KAAKk+D,UAMP36D,EAAK4P,UAAU+9B,SAAW,WACxBlxC,KAAKqzC,UAAW,EAChBrzC,KAAKk+D,UAOP36D,EAAK4P,UAAUgrD,eAAiB,WAC9Bn+D,KAAKk+D,UAOP36D,EAAK4P,UAAU+qD,OAAS,WACtBl+D,KAAKuS,MAAQ/L,OACbxG,KAAKwS,OAAShM,QAQhBjD,EAAK4P,UAAUq6C,SAAW,WACxB,MAA6B,kBAAfxtD,MAAKglC,MAAuBhlC,KAAKglC,QAAUhlC,KAAKglC,OAShEzhC,EAAK4P,UAAU0kD,iBAAmB,SAAU9wC,EAAKqnC,GAC/C,GAAIpuC,GAAc,CAMlB,QAJKhgB,KAAKuS,OACRvS,KAAKg2D,OAAOjvC,GAGN/mB,KAAKyO,QAAQ2uC,OACnB,IAAK,SACL,IAAK,MACH,MAAOp9C,MAAKyO,QAAQgd,OAAQzL,CAE9B,KAAK,UACH,GAAIza,GAAIvF,KAAKuS,MAAQ,EACjBnM,EAAIpG,KAAKwS,OAAS,EAClB68C,EAAKnqD,KAAKkZ,IAAIgwC,GAAS7oD,EACvBoG,EAAKzG,KAAKqZ,IAAI6vC,GAAShoD,CAC3B,OAAOb,GAAIa,EAAIlB,KAAKyqB,KAAK0/B,EAAIA,EAAI1jD,EAAIA,EAMvC,KAAK,MACL,IAAK,QACL,IAAK,OACL,QACE,MAAI3L,MAAKuS,MACArN,KAAK4G,IACR5G,KAAK2lB,IAAI7qB,KAAKuS,MAAQ,EAAIrN,KAAKqZ,IAAI6vC,IACnClpD,KAAK2lB,IAAI7qB,KAAKwS,OAAS,EAAItN,KAAKkZ,IAAIgwC,KAAWpuC,EAI5C,IAYfzc,EAAK4P,UAAUirD,UAAY,SAAS7C,EAAIC,GACtCx7D,KAAKu7D,GAAKA,EACVv7D,KAAKw7D,GAAKA,GASZj4D,EAAK4P,UAAUkrD,UAAY,SAAS9C,EAAIC,GACtCx7D,KAAKu7D,IAAMA,EACXv7D,KAAKw7D,IAAMA,GAMbj4D,EAAK4P,UAAUmrD,WAAa,WAC1Bt+D,KAAK27D,cAAc5pD,EAAI/R,KAAK+R,EAC5B/R,KAAK27D,cAAc3pD,EAAIhS,KAAKgS,EAC5BhS,KAAK27D,cAAcF,GAAKz7D,KAAKy7D,GAC7Bz7D,KAAK27D,cAAcD,GAAK17D,KAAK07D,IAO/Bn4D,EAAK4P,UAAU09C,aAAe,SAASr+B,GAErC,GADAxyB,KAAKs+D,aACAt+D,KAAKmrD,OAORnrD,KAAKu7D,GAAK,EACVv7D,KAAKy7D,GAAK,MARM,CAChB,GAAI78C,GAAO5e,KAAK++C,QAAU/+C,KAAKy7D,GAC3B79C,GAAQ5d,KAAKu7D,GAAK38C,GAAM5e,KAAKyO,QAAQwuC,IACzCj9C,MAAKy7D,IAAM79C,EAAK4U,EAChBxyB,KAAK+R,GAAM/R,KAAKy7D,GAAKjpC,EAOvB,GAAKxyB,KAAKorD,OAORprD,KAAKw7D,GAAK,EACVx7D,KAAK07D,GAAK,MARM,CAChB,GAAI78C,GAAO7e,KAAK++C,QAAU/+C,KAAK07D,GAC3B79C,GAAQ7d,KAAKw7D,GAAK38C,GAAM7e,KAAKyO,QAAQwuC,IACzCj9C,MAAK07D,IAAM79C,EAAK2U,EAChBxyB,KAAKgS,GAAMhS,KAAK07D,GAAKlpC,IAezBjvB,EAAK4P,UAAUy9C,oBAAsB,SAASp+B,EAAUyuB,GAEtD,GADAjhD,KAAKs+D,aACAt+D,KAAKmrD,OAQRnrD,KAAKu7D,GAAK,EACVv7D,KAAKy7D,GAAK,MATM,CAChB,GAAI78C,GAAO5e,KAAK++C,QAAU/+C,KAAKy7D,GAC3B79C,GAAQ5d,KAAKu7D,GAAK38C,GAAM5e,KAAKyO,QAAQwuC,IACzCj9C,MAAKy7D,IAAM79C,EAAK4U,EAChBxyB,KAAKy7D,GAAMv2D,KAAK2lB,IAAI7qB,KAAKy7D,IAAMxa,EAAiBjhD,KAAKy7D,GAAK,EAAKxa,GAAeA,EAAejhD,KAAKy7D,GAClGz7D,KAAK+R,GAAM/R,KAAKy7D,GAAKjpC,EAOvB,GAAKxyB,KAAKorD,OAQRprD,KAAKw7D,GAAK,EACVx7D,KAAK07D,GAAK,MATM,CAChB,GAAI78C,GAAO7e,KAAK++C,QAAU/+C,KAAK07D,GAC3B79C,GAAQ7d,KAAKw7D,GAAK38C,GAAM7e,KAAKyO,QAAQwuC,IACzCj9C,MAAK07D,IAAM79C,EAAK2U,EAChBxyB,KAAK07D,GAAMx2D,KAAK2lB,IAAI7qB,KAAK07D,IAAMza,EAAiBjhD,KAAK07D,GAAK,EAAKza,GAAeA,EAAejhD,KAAK07D,GAClG17D,KAAKgS,GAAMhS,KAAK07D,GAAKlpC,IAYzBjvB,EAAK4P,UAAUorD,QAAU,WACvB,MAAQv+D,MAAKmrD,QAAUnrD,KAAKorD,QAQ9B7nD,EAAK4P,UAAUs9C,SAAW,SAASD,GACjC,GAAIgO,GAAWt5D,KAAKyqB,KAAKzqB,KAAK4uB,IAAI9zB,KAAKy7D,GAAG,GAAKv2D,KAAK4uB,IAAI9zB,KAAK07D,GAAG,GAEhE,OAAQ8C,GAAWhO,GAOrBjtD,EAAK4P,UAAU23C,WAAa,WAC1B,MAAO9qD,MAAKqzC,UAOd9vC,EAAK4P,UAAUyB,SAAW,WACxB,MAAO5U,MAAKqH,OASd9D,EAAK4P,UAAUsrD,YAAc,SAAS1sD,EAAGC,GACvC,GAAI4M,GAAK5e,KAAK+R,EAAIA,EACd8M,EAAK7e,KAAKgS,EAAIA,CAClB,OAAO9M,MAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,IAUlCtb,EAAK4P,UAAUi8C,cAAgB,SAAStjD,EAAKY,GAC3C,IAAK1M,KAAKo7D,aAA8B50D,SAAfxG,KAAKqH,MAC5B,GAAIqF,GAAOZ,EACT9L,KAAKyO,QAAQgd,QAASzrB,KAAKyO,QAAQyuC,UAAYl9C,KAAKyO,QAAQ0uC,WAAa,MAEtE,CACH,GAAIlgC,IAASjd,KAAKyO,QAAQ0uC,UAAYn9C,KAAKyO,QAAQyuC,YAAcxwC,EAAMZ,EACvE9L,MAAKyO,QAAQgd,QAASzrB,KAAKqH,MAAQyE,GAAOmR,EAAQjd,KAAKyO,QAAQyuC,UAGnEl9C,KAAKm7D,gBAAkBn7D,KAAKyO,QAAQgd,QAQtCloB,EAAK4P,UAAU44B,KAAO,WACpB,KAAM,wCAQRxoC,EAAK4P,UAAU6iD,OAAS,WACtB,KAAM,0CAQRzyD,EAAK4P,UAAUo6C,kBAAoB,SAASxqC,GAC1C,MAAQ/iB,MAAKyH,KAAoBsb,EAAIsE,OAC7BrnB,KAAKyH,KAAOzH,KAAKuS,MAAQwQ,EAAItb,MAC7BzH,KAAK2H,IAAoBob,EAAIO,QAC7BtjB,KAAK2H,IAAM3H,KAAKwS,OAASuQ,EAAIpb,KAGvCpE,EAAK4P,UAAUoqD,aAAe,WAG5B,IAAKv9D,KAAKuS,QAAUvS,KAAKwS,OAAQ,CAC/B,GAAID,GAAOC,CACX,IAAIxS,KAAKqH,MAAO,CACdrH,KAAKyO,QAAQgd,OAAQzrB,KAAKm7D,eAC1B,IAAIl+C,GAAQjd,KAAK48D,SAASpqD,OAASxS,KAAK48D,SAASrqD,KACnC/L,UAAVyW,GACF1K,EAAQvS,KAAKyO,QAAQgd,QAASzrB,KAAK48D,SAASrqD,MAC5CC,EAASxS,KAAKyO,QAAQgd,OAAQxO,GAASjd,KAAK48D,SAASpqD,SAGrDD,EAAQ,EACRC,EAAS,OAIXD,GAAQvS,KAAK48D,SAASrqD,MACtBC,EAASxS,KAAK48D,SAASpqD,MAEzBxS,MAAKuS,MAASA,EACdvS,KAAKwS,OAASA,EAEdxS,KAAKk8D,gBAAkB,EACnBl8D,KAAKuS,MAAQ,GAAKvS,KAAKwS,OAAS,IAClCxS,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA0BjgD,KAAK+7D,uBAClF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAASvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACxFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQA,KAK1ChP,EAAK4P,UAAUurD,qBAAuB,SAAU33C,GAC9C,GAA2B,GAAvB/mB,KAAK48D,SAASrqD,MAAa,CAE7B,GAAIvS,KAAKq8D,YAAc,EAAG,CACxB,GAAI/0C,GAActnB,KAAKq8D,YAAc,EAAK,GAAK,CAC/C/0C,IAAatnB,KAAKm2D,gBAClB7uC,EAAYpiB,KAAK4G,IAAI,GAAM9L,KAAKuS,MAAM+U,GAEtCP,EAAI43C,YAAc,GAClB53C,EAAI63C,UAAU5+D,KAAK48D,SAAU58D,KAAKyH,KAAO6f,EAAWtnB,KAAK2H,IAAM2f,EAAWtnB,KAAKuS,MAAQ,EAAE+U,EAAWtnB,KAAKwS,OAAS,EAAE8U,GAItHP,EAAI43C,YAAc,EAClB53C,EAAI63C,UAAU5+D,KAAK48D,SAAU58D,KAAKyH,KAAMzH,KAAK2H,IAAK3H,KAAKuS,MAAOvS,KAAKwS,UAIvEjP,EAAK4P,UAAU0rD,gBAAkB,SAAU93C,GACzC,GAAIjN,GACA6P,EAAS,CAEb,IAAI3pB,KAAKwS,OAAO,CACdmX,EAAS3pB,KAAKwS,OAAS,CACvB,IAAIshD,GAAkB9zD,KAAK8+D,YAAY/3C,EAEnC+sC,GAAgB0C,WAAa,IAC/B7sC,GAAUmqC,EAAgBthD,OAAS,EACnCmX,GAAU,GAId7P,EAAS9Z,KAAKgS,EAAI2X,EAElB3pB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG+H,EAAQtT,SAG/CjD,EAAK4P,UAAUmqD,WAAa,SAAUv2C,GACpC/mB,KAAKu9D,aAAax2C,GAClB/mB,KAAKyH,KAASzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EACpCvS,KAAK2H,IAAS3H,KAAKgS,EAAIhS,KAAKwS,OAAS,EAErCxS,KAAK0+D,qBAAqB33C,GAE1B/mB,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK6+D,gBAAgB93C,GACrB/mB,KAAKomD,YAAY3+C,KAAOvC,KAAK4G,IAAI9L,KAAKomD,YAAY3+C,KAAMzH,KAAK8zD,gBAAgBrsD,MAC7EzH,KAAKomD,YAAY/+B,MAAQniB,KAAKwH,IAAI1M,KAAKomD,YAAY/+B,MAAOrnB,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,OAC3GvS,KAAKomD,YAAY9iC,OAASpe,KAAKwH,IAAI1M,KAAKomD,YAAY9iC,OAAQtjB,KAAKomD,YAAY9iC,OAAStjB,KAAK8zD,gBAAgBthD,SAG7GjP,EAAK4P,UAAUsqD,qBAAuB,SAAU12C,GAC9C,GAAI/mB,KAAK48D,SAAShX,KAAQ5lD,KAAK48D,SAASrqD,OAAUvS,KAAK48D,SAASpqD,OAe1DxS,KAAK++D,oCACP/+D,KAAKuS,MAAQ,EACbvS,KAAKwS,OAAS,QACPxS,MAAK++D,mCAEd/+D,KAAKu9D,aAAax2C,OAnBlB,KAAK/mB,KAAKuS,MAAO,CACf,GAAIysD,GAAiC,EAAtBh/D,KAAKyO,QAAQgd,MAC5BzrB,MAAKuS,MAAQysD,EACbh/D,KAAKwS,OAASwsD,EAKdh/D,KAAKyO,QAAQgd,QAAuE,GAA7DvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKi8D,wBAC/Fj8D,KAAKk8D,gBAAkBl8D,KAAKyO,QAAQgd,OAAQ,GAAIuzC,EAChDh/D,KAAK++D,mCAAoC,IAc/Cx7D,EAAK4P,UAAUqqD,mBAAqB,SAAUz2C,GAC5C/mB,KAAKy9D,qBAAqB12C,GAE1B/mB,KAAKyH,KAASzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EACpCvS,KAAK2H,IAAS3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAErC,IAAIysD,GAAUj/D,KAAKyH,KAAQzH,KAAKuS,MAAQ,EACpC2sD,EAAUl/D,KAAK2H,IAAO3H,KAAKwS,OAAS,EACpCiZ,EAASvmB,KAAK2lB,IAAI7qB,KAAKwS,OAAS,EAEpCxS,MAAKm/D,eAAep4C,EAAKk4C,EAASC,EAASzzC,GAE3C1E,EAAIuoC,OACJvoC,EAAIq4C,OAAOp/D,KAAK+R,EAAG/R,KAAKgS,EAAGyZ,GAC3B1E,EAAIlH,SACJkH,EAAIs4C,OAEJr/D,KAAK0+D,qBAAqB33C,GAE1BA,EAAI0oC,UAEJzvD,KAAKomD,YAAYz+C,IAAM3H,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAC7CzrB,KAAKomD,YAAY3+C,KAAOzH,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC9CzrB,KAAKomD,YAAY/+B,MAAQrnB,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC/CzrB,KAAKomD,YAAY9iC,OAAStjB,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAEhDzrB,KAAK6+D,gBAAgB93C,GAErB/mB,KAAKomD,YAAY3+C,KAAOvC,KAAK4G,IAAI9L,KAAKomD,YAAY3+C,KAAMzH,KAAK8zD,gBAAgBrsD,MAC7EzH,KAAKomD,YAAY/+B,MAAQniB,KAAKwH,IAAI1M,KAAKomD,YAAY/+B,MAAOrnB,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,OAC3GvS,KAAKomD,YAAY9iC,OAASpe,KAAKwH,IAAI1M,KAAKomD,YAAY9iC,OAAQtjB,KAAKomD,YAAY9iC,OAAStjB,KAAK8zD,gBAAgBthD,SAG7GjP,EAAK4P,UAAU8pD,WAAa,SAAUl2C,GACpC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,EAChC/mB,MAAKuS,MAAQ+sD,EAAS/sD,MAAQ,EAAImH,EAClC1Z,KAAKwS,OAAS8sD,EAAS9sD,OAAS,EAAIkH,EAEpC1Z,KAAKuS,OAAuE,GAA7DrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAK+7D,uBACvF/7D,KAAKwS,QAAuE,GAA7DtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKg8D,wBACvFh8D,KAAKk8D,gBAAkBl8D,KAAKuS,OAAS+sD,EAAS/sD,MAAQ,EAAImH,KAM9DnW,EAAK4P,UAAU6pD,SAAW,SAAUj2C,GAClC/mB,KAAKi9D,WAAWl2C,GAEhB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAI04C,UAAUz/D,KAAKyH,KAAK,EAAEsf,EAAIO,UAAWtnB,KAAK2H,IAAI,EAAEof,EAAIO,UAAWtnB,KAAKuS,MAAM,EAAEwU,EAAIO,UAAWtnB,KAAKwS,OAAO,EAAEuU,EAAIO,UAAWtnB,KAAKyO,QAAQgd,QACzI1E,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAEhJ4a,EAAI04C,UAAUz/D,KAAKyH,KAAMzH,KAAK2H,IAAK3H,KAAKuS,MAAOvS,KAAKwS,OAAQxS,KAAKyO,QAAQgd,QACzE1E,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,IAI5CzO,EAAK4P,UAAU4pD,gBAAkB,SAAUh2C,GACzC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,GAC5B1U,EAAOitD,EAAS/sD,MAAQ,EAAImH,CAChC1Z,MAAKuS,MAAQF,EACbrS,KAAKwS,OAASH,EAGdrS,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAASvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACxFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQF,IAIxC9O,EAAK4P,UAAU2pD,cAAgB,SAAU/1C,GACvC/mB,KAAK+8D,gBAAgBh2C,GACrB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAI24C,SAAS1/D,KAAK+R,EAAI/R,KAAKuS,MAAM,EAAI,EAAEwU,EAAIO,UAAWtnB,KAAKgS,EAAgB,GAAZhS,KAAKwS,OAAa,EAAEuU,EAAIO,UAAWtnB,KAAKuS,MAAQ,EAAEwU,EAAIO,UAAWtnB,KAAKwS,OAAS,EAAEuU,EAAIO,WACpJP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAChJ4a,EAAI24C,SAAS1/D,KAAK+R,EAAI/R,KAAKuS,MAAM,EAAGvS,KAAKgS,EAAgB,GAAZhS,KAAKwS,OAAYxS,KAAKuS,MAAOvS,KAAKwS,QAC/EuU,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS;EAI5CzO,EAAK4P,UAAUgqD,cAAgB,SAAUp2C,GACvC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,GAC5Bi4C,EAAW95D,KAAKwH,IAAI4yD,EAAS/sD,MAAO+sD,EAAS9sD,QAAU,EAAIkH,CAC/D1Z,MAAKyO,QAAQgd,OAASuzC,EAAW,EAEjCh/D,KAAKuS,MAAQysD,EACbh/D,KAAKwS,OAASwsD,EAKdh/D,KAAKyO,QAAQgd,QAAuE,GAA7DvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKi8D,wBAC/Fj8D,KAAKk8D,gBAAkBl8D,KAAKyO,QAAQgd,OAAQ,GAAIuzC,IAIpDz7D,EAAK4P,UAAUgsD,eAAiB,SAAUp4C,EAAKhV,EAAGC,EAAGyZ,GACnD,GAAI8zC,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIq4C,OAAOrtD,EAAGC,EAAGyZ,EAAO,EAAE1E,EAAIO,WAC9BP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAChJ4a,EAAIq4C,OAAOp/D,KAAK+R,EAAG/R,KAAKgS,EAAGyZ,GAC3B1E,EAAInH,OACJmH,EAAIlH,UAGNtc,EAAK4P,UAAU+pD,YAAc,SAAUn2C,GACrC/mB,KAAKm9D,cAAcp2C,GACnB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,EAElCxS,KAAKm/D,eAAep4C,EAAK/mB,KAAK+R,EAAG/R,KAAKgS,EAAGhS,KAAKyO,QAAQgd,QAEtDzrB,KAAKomD,YAAYz+C,IAAM3H,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAC7CzrB,KAAKomD,YAAY3+C,KAAOzH,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC9CzrB,KAAKomD,YAAY/+B,MAAQrnB,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC/CzrB,KAAKomD,YAAY9iC,OAAStjB,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAEhDzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,IAG5CzO,EAAK4P,UAAUkqD,eAAiB,SAAUt2C,GACxC,IAAK/mB,KAAKuS,MAAO,CACf,GAAI+sD,GAAWt/D,KAAK8+D,YAAY/3C,EAEhC/mB,MAAKuS,MAAyB,IAAjB+sD,EAAS/sD,MACtBvS,KAAKwS,OAA2B,EAAlB8sD,EAAS9sD,OACnBxS,KAAKuS,MAAQvS,KAAKwS,SACpBxS,KAAKuS,MAAQvS,KAAKwS,OAEpB,IAAImtD,GAAc3/D,KAAKuS,KAGvBvS,MAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAAUvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACzFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQotD,IAIxCp8D,EAAK4P,UAAUiqD,aAAe,SAAUr2C,GACtC/mB,KAAKq9D,eAAet2C,GACpB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAI64C,QAAQ5/D,KAAKyH,KAAK,EAAEsf,EAAIO,UAAWtnB,KAAK2H,IAAI,EAAEof,EAAIO,UAAWtnB,KAAKuS,MAAM,EAAEwU,EAAIO,UAAWtnB,KAAKwS,OAAO,EAAEuU,EAAIO,WAC/GP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAEhJ4a,EAAI64C,QAAQ5/D,KAAKyH,KAAMzH,KAAK2H,IAAK3H,KAAKuS,MAAOvS,KAAKwS,QAClDuU,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,IAG5CzO,EAAK4P,UAAUyqD,SAAW,SAAU72C,GAClC/mB,KAAK6/D,WAAW94C,EAAK,WAGvBxjB,EAAK4P,UAAU4qD,cAAgB,SAAUh3C,GACvC/mB,KAAK6/D,WAAW94C,EAAK,aAGvBxjB,EAAK4P,UAAU6qD,kBAAoB,SAAUj3C,GAC3C/mB,KAAK6/D,WAAW94C,EAAK,iBAGvBxjB,EAAK4P,UAAU2qD,YAAc,SAAU/2C,GACrC/mB,KAAK6/D,WAAW94C,EAAK,WAGvBxjB,EAAK4P,UAAU8qD,UAAY,SAAUl3C,GACnC/mB,KAAK6/D,WAAW94C,EAAK,SAGvBxjB,EAAK4P,UAAU0qD,aAAe,WAC5B,IAAK79D,KAAKuS,MAAO,CACfvS,KAAKyO,QAAQgd,OAAQzrB,KAAKm7D,eAC1B,IAAI9oD,GAAO,EAAIrS,KAAKyO,QAAQgd,MAC5BzrB,MAAKuS,MAAQF,EACbrS,KAAKwS,OAASH,EAGdrS,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAAsE,GAA7DvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKi8D,wBAC9Fj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQF,IAIxC9O,EAAK4P,UAAU0sD,WAAa,SAAU94C,EAAKq2B,GACzCp9C,KAAK69D,aAAa92C,GAElB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,YAC1E8/C,EAAmB,CAGvB,QAAQ1iB,GACN,IAAK,MAAiB0iB,EAAmB,CAAG,MAC5C,KAAK,SAAiBA,EAAmB,CAAG,MAC5C,KAAK,WAAiBA,EAAmB,CAAG,MAC5C,KAAK,eAAiBA,EAAmB,CAAG,MAC5C,KAAK,OAAiBA,EAAmB,EAG3C/4C,EAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAEtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIq2B,GAAOp9C,KAAK+R,EAAG/R,KAAKgS,EAAGhS,KAAKyO,QAAQgd,OAAQq0C,EAAmB/4C,EAAIO,WACvEP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAChJ4a,EAAIq2B,GAAOp9C,KAAK+R,EAAG/R,KAAKgS,EAAGhS,KAAKyO,QAAQgd,QACxC1E,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAC7CzrB,KAAKomD,YAAY3+C,KAAOzH,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC9CzrB,KAAKomD,YAAY/+B,MAAQrnB,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC/CzrB,KAAKomD,YAAY9iC,OAAStjB,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAE5CzrB,KAAKyoB,QACPzoB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,EAAIhS,KAAKwS,OAAS,EAAGhM,OAAW,WAAU,GACpFxG,KAAKomD,YAAY3+C,KAAOvC,KAAK4G,IAAI9L,KAAKomD,YAAY3+C,KAAMzH,KAAK8zD,gBAAgBrsD,MAC7EzH,KAAKomD,YAAY/+B,MAAQniB,KAAKwH,IAAI1M,KAAKomD,YAAY/+B,MAAOrnB,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,OAC3GvS,KAAKomD,YAAY9iC,OAASpe,KAAKwH,IAAI1M,KAAKomD,YAAY9iC,OAAQtjB,KAAKomD,YAAY9iC,OAAStjB,KAAK8zD,gBAAgBthD,UAI/GjP,EAAK4P,UAAUwqD,YAAc,SAAU52C,GACrC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,EAChC/mB,MAAKuS,MAAQ+sD,EAAS/sD,MAAQ,EAAImH,EAClC1Z,KAAKwS,OAAS8sD,EAAS9sD,OAAS,EAAIkH,EAGpC1Z,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAASvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACxFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,OAAS+sD,EAAS/sD,MAAQ,EAAImH,KAI9DnW,EAAK4P,UAAUuqD,UAAY,SAAU32C,GACnC/mB,KAAK29D,YAAY52C,GACjB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,EAElCxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,GAE1ChS,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,QAI5CjP,EAAK4P,UAAU4iD,OAAS,SAAUhvC,EAAKwC,EAAMxX,EAAGC,EAAG+8B,EAAOgxB,EAAUC,GAClE,GAAIz2C,GAAQrlB,OAAOlE,KAAKyO,QAAQ8uC,UAAYv9C,KAAKm8D,aAAen8D,KAAKg7D,kBAAmB,CACtFj0C,EAAIQ,MAAQvnB,KAAKqzC,SAAW,QAAU,IAAMrzC,KAAKyO,QAAQ8uC,SAAW,MAAQv9C,KAAKyO,QAAQ+uC,QAEzF,IAAIhX,GAAQjd,EAAKxhB,MAAM,MACnByuD,EAAYhwB,EAAM7gC,OAClB43C,EAAWr5C,OAAOlE,KAAKyO,QAAQ8uC,UAC/BwW,EAAQ/hD,GAAK,EAAIwkD,GAAa,EAAIjZ,CAChB,IAAlByiB,IACFjM,EAAQ/hD,GAAK,EAAIwkD,IAAc,EAAIjZ,GAKrC,KAAK,GADDhrC,GAAQwU,EAAI0vC,YAAYjwB,EAAM,IAAIj0B,MAC7B/M,EAAI,EAAOgxD,EAAJhxD,EAAeA,IAAK,CAClC,GAAI8hB,GAAYP,EAAI0vC,YAAYjwB,EAAMhhC,IAAI+M,KAC1CA,GAAQ+U,EAAY/U,EAAQ+U,EAAY/U,EAE1C,GAAIC,GAASxS,KAAKyO,QAAQ8uC,SAAWiZ,EACjC/uD,EAAOsK,EAAIQ,EAAQ,EACnB5K,EAAMqK,EAAIQ,EAAS,CACP,YAAZutD,IACFp4D,GAAO,GAAM41C,EACb51C,GAAO,EACPosD,GAAS,GAEX/zD,KAAK8zD,iBAAmBnsD,IAAIA,EAAIF,KAAKA,EAAK8K,MAAMA,EAAMC,OAAOA,EAAOuhD,MAAMA,GAG5CvtD,SAA1BxG,KAAKyO,QAAQgvC,UAAoD,OAA1Bz9C,KAAKyO,QAAQgvC,UAA+C,SAA1Bz9C,KAAKyO,QAAQgvC,WACxF12B,EAAIiB,UAAYhoB,KAAKyO,QAAQgvC,SAC7B12B,EAAIkwC,SAASxvD,EAAME,EAAK4K,EAAOC,IAIjCuU,EAAIiB,UAAYhoB,KAAKyO,QAAQ6uC,WAAa,QAC1Cv2B,EAAIuB,UAAYymB,GAAS,SACzBhoB,EAAIwB,aAAew3C,GAAY,SAC3B//D,KAAKyO,QAAQivC,gBAAkB,IACjC32B,EAAIO,UAActnB,KAAKyO,QAAQivC,gBAC/B32B,EAAIY,YAAc3nB,KAAKyO,QAAQkvC,gBAC/B52B,EAAImwC,SAAc,QAEpB,KAAK,GAAI1xD,GAAI,EAAOgxD,EAAJhxD,EAAeA,IAC1BxF,KAAKyO,QAAQivC,iBACd32B,EAAIowC,WAAW3wB,EAAMhhC,GAAIuM,EAAGgiD,GAE9BhtC,EAAIyB,SAASge,EAAMhhC,GAAIuM,EAAGgiD,GAC1BA,GAASxW,IAMfh6C,EAAK4P,UAAU2rD,YAAc,SAAS/3C,GACpC,GAAmBvgB,SAAfxG,KAAKyoB,MAAqB,CAC5B1B,EAAIQ,MAAQvnB,KAAKqzC,SAAW,QAAU,IAAMrzC,KAAKyO,QAAQ8uC,SAAW,MAAQv9C,KAAKyO,QAAQ+uC,QAMzF,KAAK,GAJDhX,GAAQxmC,KAAKyoB,MAAM1gB,MAAM,MACzByK,GAAUtO,OAAOlE,KAAKyO,QAAQ8uC,UAAY,GAAK/W,EAAM7gC,OACrD4M,EAAQ,EAEH/M,EAAI,EAAGq7B,EAAO2F,EAAM7gC,OAAYk7B,EAAJr7B,EAAUA,IAC7C+M,EAAQrN,KAAKwH,IAAI6F,EAAOwU,EAAI0vC,YAAYjwB,EAAMhhC,IAAI+M,MAGpD,QAAQA,MAASA,EAAOC,OAAUA,EAAQgkD,UAAWhwB,EAAM7gC,QAG3D,OAAQ4M,MAAS,EAAGC,OAAU,EAAGgkD,UAAW,IAUhDjzD,EAAK4P,UAAU48C,OAAS,WACtB,MAAmBvpD,UAAfxG,KAAKuS,MACDvS,KAAK+R,EAAI/R,KAAKuS,MAAOvS,KAAKm2D,iBAAoBn2D,KAAK+jD,cAAchyC,GACjE/R,KAAK+R,EAAI/R,KAAKuS,MAAOvS,KAAKm2D,gBAAoBn2D,KAAKgkD,kBAAkBjyC,GACrE/R,KAAKgS,EAAIhS,KAAKwS,OAAOxS,KAAKm2D,iBAAoBn2D,KAAK+jD,cAAc/xC,GACjEhS,KAAKgS,EAAIhS,KAAKwS,OAAOxS,KAAKm2D,gBAAoBn2D,KAAKgkD,kBAAkBhyC,GAGpE,GAQXzO,EAAK4P,UAAU8sD,OAAS,WACtB,MAAQjgE,MAAK+R,GAAK/R,KAAK+jD,cAAchyC,GAC7B/R,KAAK+R,EAAI/R,KAAKgkD,kBAAkBjyC,GAChC/R,KAAKgS,GAAKhS,KAAK+jD,cAAc/xC,GAC7BhS,KAAKgS,EAAIhS,KAAKgkD,kBAAkBhyC,GAW1CzO,EAAK4P,UAAU28C,eAAiB,SAAS7yC,EAAM8mC,EAAcC,GAC3DhkD,KAAKm2D,gBAAkB,EAAIl5C,EAC3Bjd,KAAKm8D,aAAel/C,EACpBjd,KAAK+jD,cAAgBA,EACrB/jD,KAAKgkD,kBAAoBA,GAS3BzgD,EAAK4P,UAAU6vB,SAAW,SAAS/lB,GACjCjd,KAAKm2D,gBAAkB,EAAIl5C,EAC3Bjd,KAAKm8D,aAAel/C,GAQtB1Z,EAAK4P,UAAU+sD,cAAgB,WAC7BlgE,KAAKy7D,GAAK,EACVz7D,KAAK07D,GAAK,GASZn4D,EAAK4P,UAAUgtD,eAAiB,SAASC,GACvC,GAAIC,GAAergE,KAAKy7D,GAAKz7D,KAAKy7D,GAAK2E,CAEvCpgE,MAAKy7D,GAAKv2D,KAAKyqB,KAAK0wC,EAAargE,KAAKyO,QAAQwuC,MAC9CojB,EAAergE,KAAK07D,GAAK17D,KAAK07D,GAAK0E,EAEnCpgE,KAAK07D,GAAKx2D,KAAKyqB,KAAK0wC,EAAargE,KAAKyO,QAAQwuC,OAGhDp9C,EAAOD,QAAU2D,GAKb,SAAS1D,GAWb,QAAS2D,GAAM+V,EAAWxH,EAAGC,EAAGuX,EAAMtc,GAElCjN,KAAKuZ,UADHA,EACeA,EAGAhI,SAASojB,KAIdnuB,SAAVyG,IACe,gBAAN8E,IACT9E,EAAQ8E,EACRA,EAAIvL,QACqB,gBAAT+iB,IAChBtc,EAAQsc,EACRA,EAAO/iB,QAGPyG,GACEqwC,UAAW,QACXC,SAAU,GACVC,SAAU,UACVryC,OACEiB,OAAQ,OACRD,WAAY,aAMpBnM,KAAK+R,EAAI,EACT/R,KAAKgS,EAAI,EACThS,KAAKgkB,QAAU,EAELxd,SAANuL,GAAyBvL,SAANwL,GACrBhS,KAAK6tD,YAAY97C,EAAGC,GAETxL,SAAT+iB,GACFvpB,KAAK8tD,QAAQvkC,GAIfvpB,KAAKsf,MAAQ/N,SAASM,cAAc,MACpC,IAAIyuD,GAAYtgE,KAAKsf,MAAMrS,KAC3BqzD,GAAU18C,SAAW,WACrB08C,EAAU9oC,WAAa,SACvB8oC,EAAUl0D,OAAS,aAAea,EAAM9B,MAAMiB,OAC9Ck0D,EAAUn1D,MAAQ8B,EAAMqwC,UACxBgjB,EAAU/iB,SAAWtwC,EAAMswC,SAAW,KACtC+iB,EAAUC,WAAatzD,EAAMuwC,SAC7B8iB,EAAUt8C,QAAUhkB,KAAKgkB,QAAU,KACnCs8C,EAAU3gD,gBAAkB1S,EAAM9B,MAAMgB,WACxCm0D,EAAUtwC,aAAe,MACzBswC,EAAUxuC,gBAAkB,MAC5BwuC,EAAUE,mBAAqB,MAC/BF,EAAUrwC,UAAY,wCACtBqwC,EAAUG,WAAa,SACvBzgE,KAAKuZ,UAAU9H,YAAYzR,KAAKsf,OAOlC9b,EAAM2P,UAAU06C,YAAc,SAAS97C,EAAGC,GACxChS,KAAK+R,EAAInH,SAASmH,GAClB/R,KAAKgS,EAAIpH,SAASoH,IAOpBxO,EAAM2P,UAAU26C,QAAU,SAASl+B,GAC7BA,YAAmB+c,UACrB3sC,KAAKsf,MAAM2E,UAAY,GACvBjkB,KAAKsf,MAAM7N,YAAYme,IAGvB5vB,KAAKsf,MAAM2E,UAAY2L,GAQ3BpsB,EAAM2P,UAAUw0B,KAAO,SAAUA,GAK/B,GAJanhC,SAATmhC,IACFA,GAAO,GAGLA,EAAM,CACR,GAAIn1B,GAASxS,KAAKsf,MAAMuF,aACpBtS,EAASvS,KAAKsf,MAAME,YACpBgV,EAAYx0B,KAAKsf,MAAMzV,WAAWgb,aAClC62B,EAAW17C,KAAKsf,MAAMzV,WAAW2V,YAEjC7X,EAAO3H,KAAKgS,EAAIQ,CAChB7K,GAAM6K,EAASxS,KAAKgkB,QAAUwQ,IAChC7sB,EAAM6sB,EAAYhiB,EAASxS,KAAKgkB,SAE9Brc,EAAM3H,KAAKgkB,UACbrc,EAAM3H,KAAKgkB,QAGb,IAAIvc,GAAOzH,KAAK+R,CACZtK,GAAO8K,EAAQvS,KAAKgkB,QAAU03B,IAChCj0C,EAAOi0C,EAAWnpC,EAAQvS,KAAKgkB,SAE7Bvc,EAAOzH,KAAKgkB,UACdvc,EAAOzH,KAAKgkB,SAGdhkB,KAAKsf,MAAMrS,MAAMxF,KAAOA,EAAO,KAC/BzH,KAAKsf,MAAMrS,MAAMtF,IAAMA,EAAM,KAC7B3H,KAAKsf,MAAMrS,MAAMuqB,WAAa,cAG9Bx3B,MAAK0nC,QAOTlkC,EAAM2P,UAAUu0B,KAAO,WACrB1nC,KAAKsf,MAAMrS,MAAMuqB,WAAa,UAGhC33B,EAAOD,QAAU4D,GAKb,SAAS3D,EAAQD,GAarB,QAAS8gE,GAAUhuD,GAEjB,MADAod,GAAMpd,EACCiuD,IAoCT,QAAS1+B,KACP95B,EAAQ,EACR1H,EAAIqvB,EAAI1K,OAAO,GAQjB,QAASiD,KACPlgB,IACA1H,EAAIqvB,EAAI1K,OAAOjd,GAOjB,QAASy4D,KACP,MAAO9wC,GAAI1K,OAAOjd,EAAQ,GAS5B,QAAS04D,GAAepgE,GACtB,MAAOqgE,GAAkB9yD,KAAKvN,GAShC,QAASsgE,GAAOx7D,EAAGa,GAKjB,GAJKb,IACHA,MAGEa,EACF,IAAK,GAAI6P,KAAQ7P,GACXA,EAAEN,eAAemQ,KACnB1Q,EAAE0Q,GAAQ7P,EAAE6P,GAIlB,OAAO1Q,GAeT,QAASqS,GAASmL,EAAKooB,EAAM9jC,GAG3B,IAFA,GAAI+F,GAAO+9B,EAAKpjC,MAAM,KAClBi5D,EAAIj+C,EACD3V,EAAKzH,QAAQ,CAClB,GAAI+C,GAAM0E,EAAKkE,OACXlE,GAAKzH,QAEFq7D,EAAEt4D,KACLs4D,EAAEt4D,OAEJs4D,EAAIA,EAAEt4D,IAINs4D,EAAEt4D,GAAOrB,GAWf,QAAS45D,GAAQhwC,EAAO60B,GAOtB,IANA,GAAItgD,GAAGC,EACHq0B,EAAU,KAGVonC,GAAUjwC,GACVvxB,EAAOuxB,EACJvxB,EAAKolC,QACVo8B,EAAOl5D,KAAKtI,EAAKolC,QACjBplC,EAAOA,EAAKolC,MAId,IAAIplC,EAAKs9C,MACP,IAAKx3C,EAAI,EAAGC,EAAM/F,EAAKs9C,MAAMr3C,OAAYF,EAAJD,EAASA,IAC5C,GAAIsgD,EAAKzlD,KAAOX,EAAKs9C,MAAMx3C,GAAGnF,GAAI,CAChCy5B,EAAUp6B,EAAKs9C,MAAMx3C,EACrB,OAiBN,IAZKs0B,IAEHA,GACEz5B,GAAIylD,EAAKzlD,IAEP4wB,EAAM60B,OAERhsB,EAAQqnC,KAAOJ,EAAMjnC,EAAQqnC,KAAMlwC,EAAM60B,QAKxCtgD,EAAI07D,EAAOv7D,OAAS,EAAGH,GAAK,EAAGA,IAAK,CACvC,GAAIkF,GAAIw2D,EAAO17D,EAEVkF,GAAEsyC,QACLtyC,EAAEsyC,UAE4B,IAA5BtyC,EAAEsyC,MAAMr2C,QAAQmzB,IAClBpvB,EAAEsyC,MAAMh1C,KAAK8xB,GAKbgsB,EAAKqb,OACPrnC,EAAQqnC,KAAOJ,EAAMjnC,EAAQqnC,KAAMrb,EAAKqb,OAS5C,QAASC,GAAQnwC,EAAOy8B,GAKtB,GAJKz8B,EAAM6sB,QACT7sB,EAAM6sB,UAER7sB,EAAM6sB,MAAM91C,KAAK0lD,GACbz8B,EAAMy8B,KAAM,CACd,GAAIyT,GAAOJ,KAAU9vC,EAAMy8B,KAC3BA,GAAKyT,KAAOJ,EAAMI,EAAMzT,EAAKyT,OAajC,QAASE,GAAWpwC,EAAO7H,EAAMC,EAAIviB,EAAMq6D,GACzC,GAAIzT,IACFtkC,KAAMA,EACNC,GAAIA,EACJviB,KAAMA,EAQR,OALImqB,GAAMy8B,OACRA,EAAKyT,KAAOJ,KAAU9vC,EAAMy8B,OAE9BA,EAAKyT,KAAOJ,EAAMrT,EAAKyT,SAAYA,GAE5BzT,EAOT,QAAS4T,KAKP,IAJAC,EAAYC,EAAUC,KACtBC,EAAQ,GAGI,KAALjhE,GAAiB,KAALA,GAAkB,MAALA,GAAkB,MAALA,GAC3C4nB,GAGF,GAAG,CACD,GAAIs5C,IAAY,CAGhB,IAAS,KAALlhE,EAAU,CAGZ,IADA,GAAI+E,GAAI2C,EAAQ,EACQ,KAAjB2nB,EAAI1K,OAAO5f,IAA8B,KAAjBsqB,EAAI1K,OAAO5f,IACxCA,GAEF,IAAqB,MAAjBsqB,EAAI1K,OAAO5f,IAA+B,IAAjBsqB,EAAI1K,OAAO5f,GAAU,CAEhD,KAAY,IAAL/E,GAAgB,MAALA,GAChB4nB,GAEFs5C,IAAY,GAGhB,GAAS,KAALlhE,GAA6B,KAAjBmgE,IAAsB,CAEpC,KAAY,IAALngE,GAAgB,MAALA,GAChB4nB,GAEFs5C,IAAY,EAEd,GAAS,KAALlhE,GAA6B,KAAjBmgE,IAAsB,CAEpC,KAAY,IAALngE,GAAS,CACd,GAAS,KAALA,GAA6B,KAAjBmgE,IAAsB,CAEpCv4C,IACAA,GACA,OAGAA,IAGJs5C,GAAY,EAId,KAAY,KAALlhE,GAAiB,KAALA,GAAkB,MAALA,GAAkB,MAALA,GAC3C4nB,UAGGs5C,EAGP,IAAS,IAALlhE,EAGF,YADA8gE,EAAYC,EAAUI,UAKxB,IAAIC,GAAKphE,EAAImgE,GACb,IAAIkB,EAAWD,GAKb,MAJAN,GAAYC,EAAUI,UACtBF,EAAQG,EACRx5C,QACAA,IAKF,IAAIy5C,EAAWrhE,GAIb,MAHA8gE,GAAYC,EAAUI,UACtBF,EAAQjhE,MACR4nB,IAMF,IAAIw4C,EAAepgE,IAAW,KAALA,EAAU,CAIjC,IAHAihE,GAASjhE,EACT4nB,IAEOw4C,EAAepgE,IACpBihE,GAASjhE,EACT4nB,GAYF,OAVa,SAATq5C,EACFA,GAAQ,EAEQ,QAATA,EACPA,GAAQ,EAEAh9D,MAAMR,OAAOw9D,MACrBA,EAAQx9D,OAAOw9D,SAEjBH,EAAYC,EAAUO,YAKxB,GAAS,KAALthE,EAAU,CAEZ,IADA4nB,IACY,IAAL5nB,IAAiB,KAALA,GAAkB,KAALA,GAA6B,KAAjBmgE,MAC1Cc,GAASjhE,EACA,KAALA,GACF4nB,IAEFA,GAEF,IAAS,KAAL5nB,EACF,KAAMuhE,GAAe,2BAIvB,OAFA35C,UACAk5C,EAAYC,EAAUO,YAMxB,IADAR,EAAYC,EAAUS,QACV,IAALxhE,GACLihE,GAASjhE,EACT4nB,GAEF,MAAM,IAAI7O,aAAY,yBAA2B0oD,EAAKR,EAAO,IAAM,KAOrE,QAASf,KACP,GAAI1vC,KAwBJ,IAtBAgR,IACAq/B,IAGa,UAATI,IACFzwC,EAAMkxC,QAAS,EACfb,MAIW,SAATI,GAA6B,WAATA,KACtBzwC,EAAMnqB,KAAO46D,EACbJ,KAIEC,GAAaC,EAAUO,aACzB9wC,EAAM5wB,GAAKqhE,EACXJ,KAIW,KAATI,EACF,KAAMM,GAAe,2BAQvB,IANAV,IAGAc,EAAgBnxC,GAGH,KAATywC,EACF,KAAMM,GAAe,2BAKvB,IAHAV,IAGc,KAAVI,EACF,KAAMM,GAAe,uBASvB,OAPAV,WAGOrwC,GAAM60B,WACN70B,GAAMy8B,WACNz8B,GAAMA,MAENA,EAOT,QAASmxC,GAAiBnxC,GACxB,KAAiB,KAAVywC,GAAyB,KAATA,GACrBW,EAAepxC,GACF,KAATywC,GACFJ,IAWN,QAASe,GAAepxC,GAEtB,GAAIqxC,GAAWC,EAActxC,EAC7B,IAAIqxC,EAIF,WAFAE,GAAUvxC,EAAOqxC,EAMnB,IAAInB,GAAOsB,EAAwBxxC,EACnC,KAAIkwC,EAAJ,CAKA,GAAII,GAAaC,EAAUO,WACzB,KAAMC,GAAe,sBAEvB,IAAI3hE,GAAKqhE,CAGT,IAFAJ,IAEa,KAATI,EAAc,CAGhB,GADAJ,IACIC,GAAaC,EAAUO,WACzB,KAAMC,GAAe,sBAEvB/wC,GAAM5wB,GAAMqhE,EACZJ,QAIAoB,GAAmBzxC,EAAO5wB,IAS9B,QAASkiE,GAAetxC,GACtB,GAAIqxC,GAAW,IAgBf,IAba,YAATZ,IACFY,KACAA,EAASx7D,KAAO,WAChBw6D,IAGIC,GAAaC,EAAUO,aACzBO,EAASjiE,GAAKqhE,EACdJ,MAKS,KAATI,EAAc,CAehB,GAdAJ,IAEKgB,IACHA,MAEFA,EAASx9B,OAAS7T,EAClBqxC,EAASxc,KAAO70B,EAAM60B,KACtBwc,EAAS5U,KAAOz8B,EAAMy8B,KACtB4U,EAASrxC,MAAQA,EAAMA,MAGvBmxC,EAAgBE,GAGH,KAATZ,EACF,KAAMM,GAAe,2BAEvBV,WAGOgB,GAASxc,WACTwc,GAAS5U,WACT4U,GAASrxC,YACTqxC,GAASx9B,OAGX7T,EAAM0xC,YACT1xC,EAAM0xC,cAER1xC,EAAM0xC,UAAU36D,KAAKs6D,GAGvB,MAAOA,GAYT,QAASG,GAAyBxxC,GAEhC,MAAa,QAATywC,GACFJ,IAGArwC,EAAM60B,KAAO8c,IACN,QAES,QAATlB,GACPJ,IAGArwC,EAAMy8B,KAAOkV,IACN,QAES,SAATlB,GACPJ,IAGArwC,EAAMA,MAAQ2xC,IACP,SAGF,KAQT,QAASF,GAAmBzxC,EAAO5wB,GAEjC,GAAIylD,IACFzlD,GAAIA,GAEF8gE,EAAOyB,GACPzB,KACFrb,EAAKqb,KAAOA,GAEdF,EAAQhwC,EAAO60B,GAGf0c,EAAUvxC,EAAO5wB,GAQnB,QAASmiE,GAAUvxC,EAAO7H,GACxB,KAAgB,MAATs4C,GAA0B,MAATA,GAAe,CACrC,GAAIr4C,GACAviB,EAAO46D,CACXJ,IAEA,IAAIgB,GAAWC,EAActxC,EAC7B,IAAIqxC,EACFj5C,EAAKi5C,MAEF,CACH,GAAIf,GAAaC,EAAUO,WACzB,KAAMC,GAAe,kCAEvB34C,GAAKq4C,EACLT,EAAQhwC,GACN5wB,GAAIgpB,IAENi4C,IAIF,GAAIH,GAAOyB,IAGPlV,EAAO2T,EAAWpwC,EAAO7H,EAAMC,EAAIviB,EAAMq6D,EAC7CC,GAAQnwC,EAAOy8B,GAEftkC,EAAOC,GASX,QAASu5C,KAGP,IAFA,GAAIzB,GAAO,KAEK,KAATO,GAAc,CAGnB,IAFAJ,IACAH,KACiB,KAAVO,GAAyB,KAATA,GAAc,CACnC,GAAIH,GAAaC,EAAUO,WACzB,KAAMC,GAAe,0BAEvB,IAAI/rD,GAAOyrD,CAGX,IADAJ,IACa,KAATI,EACF,KAAMM,GAAe,wBAIvB,IAFAV,IAEIC,GAAaC,EAAUO,WACzB,KAAMC,GAAe,2BAEvB,IAAI36D,GAAQq6D,CACZ9pD,GAASupD,EAAMlrD,EAAM5O,GAErBi6D,IACY,KAARI,GACFJ,IAIJ,GAAa,KAATI,EACF,KAAMM,GAAe,qBAEvBV,KAGF,MAAOH,GAQT,QAASa,GAAea,GACtB,MAAO,IAAIrpD,aAAYqpD,EAAU,UAAYX,EAAKR,EAAO,IAAM,WAAav5D,EAAQ,KAStF,QAAS+5D,GAAM34C,EAAMu5C,GACnB,MAAQv5C,GAAK5jB,QAAUm9D,EAAav5C,EAAQA,EAAKje,OAAO,EAAG,IAAM,MASnE,QAASy3D,GAASC,EAAQC,EAAQ/pD,GAC5BjT,MAAMC,QAAQ88D,GAChBA,EAAO36D,QAAQ,SAAU66D,GACnBj9D,MAAMC,QAAQ+8D,GAChBA,EAAO56D,QAAQ,SAAU86D,GACvBjqD,EAAGgqD,EAAOC,KAIZjqD,EAAGgqD,EAAOD,KAKVh9D,MAAMC,QAAQ+8D,GAChBA,EAAO56D,QAAQ,SAAU86D,GACvBjqD,EAAG8pD,EAAQG,KAIbjqD,EAAG8pD,EAAQC,GAWjB,QAAS5b,GAAY30C,GAEnB,GAAI00C,GAAUsZ,EAAShuD,GACnB0wD,GACFpmB,SACAc,SACArvC,WAmBF,IAfI24C,EAAQpK,OACVoK,EAAQpK,MAAM30C,QAAQ,SAAUg7D,GAC9B,GAAIC,IACFjjE,GAAIgjE,EAAQhjE,GACZooB,MAAOrkB,OAAOi/D,EAAQ56C,OAAS46C,EAAQhjE,IAEzC0gE,GAAMuC,EAAWD,EAAQlC,MACrBmC,EAAUjmB,QACZimB,EAAUlmB,MAAQ,SAEpBgmB,EAAUpmB,MAAMh1C,KAAKs7D,KAKrBlc,EAAQtJ,MAAO,CAMjB,GAAIylB,GAAc,SAAUC,GAC1B,GAAIC,IACFr6C,KAAMo6C,EAAQp6C,KACdC,GAAIm6C,EAAQn6C,GAId,OAFA03C,GAAM0C,EAAWD,EAAQrC,MACzBsC,EAAUx2D,MAAyB,MAAhBu2D,EAAQ18D,KAAgB,QAAU,OAC9C28D,EAGTrc,GAAQtJ,MAAMz1C,QAAQ,SAAUm7D,GAC9B,GAAIp6C,GAAMC,CAERD,GADEo6C,EAAQp6C,eAAgB7iB,QACnBi9D,EAAQp6C,KAAK4zB,OAIlB38C,GAAImjE,EAAQp6C,MAKdC,EADEm6C,EAAQn6C,aAAc9iB,QACnBi9D,EAAQn6C,GAAG2zB,OAId38C,GAAImjE,EAAQn6C,IAIZm6C,EAAQp6C,eAAgB7iB,SAAUi9D,EAAQp6C,KAAK00B,OACjD0lB,EAAQp6C,KAAK00B,MAAMz1C,QAAQ,SAAUq7D,GACnC,GAAID,GAAYF,EAAYG,EAC5BN,GAAUtlB,MAAM91C,KAAKy7D,KAIzBV,EAAS35C,EAAMC,EAAI,SAAUD,EAAMC,GACjC,GAAIq6C,GAAUrC,EAAW+B,EAAWh6C,EAAK/oB,GAAIgpB,EAAGhpB,GAAImjE,EAAQ18D,KAAM08D,EAAQrC,MACtEsC,EAAYF,EAAYG,EAC5BN,GAAUtlB,MAAM91C,KAAKy7D,KAGnBD,EAAQn6C,aAAc9iB,SAAUi9D,EAAQn6C,GAAGy0B,OAC7C0lB,EAAQn6C,GAAGy0B,MAAMz1C,QAAQ,SAAUq7D,GACjC,GAAID,GAAYF,EAAYG,EAC5BN,GAAUtlB,MAAM91C,KAAKy7D,OAW7B,MAJIrc,GAAQ+Z,OACViC,EAAU30D,QAAU24C,EAAQ+Z,MAGvBiC,EAnyBT,GAAI5B,IACFC,KAAO,EACPG,UAAY,EACZG,WAAY,EACZE,QAAU,GAIRH,GACF6B,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EAELC,MAAM,EACNC,MAAM,GAGJr0C,EAAM,GACN3nB,EAAQ,EACR1H,EAAI,GACJihE,EAAQ,GACRH,EAAYC,EAAUC,KAmCtBX,EAAoB,iBA2uBxBlhE,GAAQ8gE,SAAWA,EACnB9gE,EAAQynD,WAAaA,GAKjB,SAASxnD,EAAQD,GAGrB,QAAS4nD,GAAW4c,EAAW31D,GAC7B,GAAIqvC,MACAd,IACJh9C,MAAKyO,SACHqvC,OACEQ,cAAc,GAEhBtB,OACEqnB,eAAe,EACfn5D,YAAY,IAIA1E,SAAZiI,IACFzO,KAAKyO,QAAQuuC,MAAqB,cAAIvuC,EAAQ41D,eAAgB,EAC9DrkE,KAAKyO,QAAQuuC,MAAkB,WAAOvuC,EAAQvD,YAAgB,EAC9DlL,KAAKyO,QAAQqvC,MAAoB,aAAKrvC,EAAQ6vC,cAAgB,EAKhE,KAAK,GAFDgmB,GAASF,EAAUtmB,MACnBymB,EAASH,EAAUpnB,MACdx3C,EAAI,EAAGA,EAAI8+D,EAAO3+D,OAAQH,IAAK,CACtC,GAAIkoD,MACA8W,EAAQF,EAAO9+D,EACnBkoD,GAAS,GAAI8W,EAAMnkE,GACnBqtD,EAAW,KAAI8W,EAAMC,OACrB/W,EAAS,GAAI8W,EAAM96D,OACnBgkD,EAAiB,WAAI8W,EAAMvpB,WAG3ByS,EAAY,MAAI8W,EAAMr5D,MACtBuiD,EAAmB,aAAsBlnD,SAAlBknD,EAAY,OAAkB,EAAQ1tD,KAAKyO,QAAQ6vC,aAC1ER,EAAM91C,KAAK0lD,GAGb,IAAK,GAAIloD,GAAI,EAAGA,EAAI++D,EAAO5+D,OAAQH,IAAK,CACtC,GAAIsgD,MACA4e,EAAQH,EAAO/+D,EACnBsgD,GAAS,GAAI4e,EAAMrkE,GACnBylD,EAAiB,WAAI4e,EAAMzpB,WAC3B6K,EAAQ,EAAI4e,EAAM3yD,EAClB+zC,EAAQ,EAAI4e,EAAM1yD,EAClB8zC,EAAY,MAAI4e,EAAMj8C,MAEpBq9B,EAAY,MADuB,GAAjC9lD,KAAKyO,QAAQuuC,MAAM9xC,WACLw5D,EAAMv5D,MAGU3E,SAAhBk+D,EAAMv5D,OAAuBgB,WAAWu4D,EAAMv5D,MAAOiB,OAAOs4D,EAAMv5D,OAAS3E,OAE7Fs/C,EAAa,OAAI4e,EAAMryD,KACvByzC,EAAqB,eAAI9lD,KAAKyO,QAAQuuC,MAAMqnB,cAC5Cve,EAAqB,eAAI9lD,KAAKyO,QAAQuuC,MAAMqnB,cAC5CrnB,EAAMh1C,KAAK89C,GAGb,OAAQ9I,MAAMA,EAAOc,MAAMA,GAG7Bl+C,EAAQ4nD,WAAaA,GAIjB,SAAS3nD,EAAQD,EAASM,GAI9BL,EAAOD,QAA6B,mBAAX2J,SAA2BA,OAAe,QAAKrJ,EAAoB,KAKxF,SAASL,EAAQD,EAASM,GAI9B,GAAsB,mBAAXqJ,QAAwB,CACjC,GAAIo7D,GAAczkE,EAAoB,IAClC6D,EAASwF,OAAe,QAAKrJ,EAAoB,GACrDL,GAAOD,QAAU+kE,EAAY5gE,OAG7BlE,GAAOD,QAAU,WACf,KAAMgE,OAAM,+DAOZ,SAAS/D,EAAQD,EAASM,GAmB9B,QAASg2B,MAjBT,GAAInZ,GAAU7c,EAAoB,IAC9B6D,EAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,GAK3BolD,GAJUplD,EAAoB,GACnBA,EAAoB,GACvBA,EAAoB,IAClBA,EAAoB,IAClBA,EAAoB,KAChCyB,EAAWzB,EAAoB,GAYnC6c,GAAQmZ,EAAK/iB,WASb+iB,EAAK/iB,UAAUuhB,QAAU,SAAUnb,GAmHjC,QAASqrD,GAAat7D,GAChB6K,EAAGs1C,YACLt1C,EAAGyZ,KAAK,aAActkB,GApH1BtJ,KAAK+vB,OAEL/vB,KAAK+vB,IAAIrwB,KAAuB6R,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI5jB,WAAuBoF,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIgV,mBAAuBxzB,SAASM,cAAc,OACvD7R,KAAK+vB,IAAImY,qBAAuB32B,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI8H,gBAAuBtmB,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI80C,cAAuBtzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI+0C,eAAuBvzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI7D,OAAuB3a,SAASM,cAAc,OACvD7R,KAAK+vB,IAAItoB,KAAuB8J,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI1I,MAAuB9V,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIpoB,IAAuB4J,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIzM,OAAuB/R,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIg1C,UAAuBxzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIi1C,aAAuBzzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIk1C,cAAuB1zD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIm1C,iBAAuB3zD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIo1C,eAAuB5zD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIq1C,kBAAuB7zD,SAASM,cAAc,OAEvD7R,KAAK+vB,IAAIrwB,KAAKmI,UAA4B,oBAC1C7H,KAAK+vB,IAAI5jB,WAAWtE,UAAsB,sBAC1C7H,KAAK+vB,IAAIgV,mBAAmBl9B,UAAc,+BAC1C7H,KAAK+vB,IAAImY,qBAAqBrgC,UAAY,iCAC1C7H,KAAK+vB,IAAI8H,gBAAgBhwB,UAAiB,kBAC1C7H,KAAK+vB,IAAI80C,cAAch9D,UAAmB,gBAC1C7H,KAAK+vB,IAAI+0C,eAAej9D,UAAkB,iBAC1C7H,KAAK+vB,IAAIpoB,IAAIE,UAA6B,eAC1C7H,KAAK+vB,IAAIzM,OAAOzb,UAA0B,kBAC1C7H,KAAK+vB,IAAItoB,KAAKI,UAA4B,UAC1C7H,KAAK+vB,IAAI7D,OAAOrkB,UAA0B,UAC1C7H,KAAK+vB,IAAI1I,MAAMxf,UAA2B,UAC1C7H,KAAK+vB,IAAIg1C,UAAUl9D,UAAuB,aAC1C7H,KAAK+vB,IAAIi1C,aAAan9D,UAAoB,gBAC1C7H,KAAK+vB,IAAIk1C,cAAcp9D,UAAmB,aAC1C7H,KAAK+vB,IAAIm1C,iBAAiBr9D,UAAgB,gBAC1C7H,KAAK+vB,IAAIo1C,eAAet9D,UAAkB,aAC1C7H,KAAK+vB,IAAIq1C,kBAAkBv9D,UAAe,gBAE1C7H,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI5jB,YACnCnM,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAIgV,oBACnC/kC,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAImY,sBACnCloC,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI8H,iBACnC73B,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI80C,eACnC7kE,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI+0C,gBACnC9kE,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAIpoB,KACnC3H,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAIzM,QAEnCtjB,KAAK+vB,IAAI8H,gBAAgBpmB,YAAYzR,KAAK+vB,IAAI7D,QAC9ClsB,KAAK+vB,IAAI80C,cAAcpzD,YAAYzR,KAAK+vB,IAAItoB,MAC5CzH,KAAK+vB,IAAI+0C,eAAerzD,YAAYzR,KAAK+vB,IAAI1I,OAE7CrnB,KAAK+vB,IAAI8H,gBAAgBpmB,YAAYzR,KAAK+vB,IAAIg1C,WAC9C/kE,KAAK+vB,IAAI8H,gBAAgBpmB,YAAYzR,KAAK+vB,IAAIi1C,cAC9ChlE,KAAK+vB,IAAI80C,cAAcpzD,YAAYzR,KAAK+vB,IAAIk1C,eAC5CjlE,KAAK+vB,IAAI80C,cAAcpzD,YAAYzR,KAAK+vB,IAAIm1C,kBAC5CllE,KAAK+vB,IAAI+0C,eAAerzD,YAAYzR,KAAK+vB,IAAIo1C,gBAC7CnlE,KAAK+vB,IAAI+0C,eAAerzD,YAAYzR,KAAK+vB,IAAIq1C,mBAE7CplE,KAAKuT,GAAG,cAAevT,KAAKyhB,OAAOqT,KAAK90B,MAExC,IAAImU,GAAKnU,IACTA,MAAKuT,GAAG,SAAU,SAAUo7C,GACtBA,GAAkC,GAApBA,EAAWv7C,MAEtBe,EAAGkxD,eACNlxD,EAAGkxD,aAAe/rD,WAAW,WAC3BnF,EAAGkxD,aAAe,KAClBlxD,EAAGsN,UACF,IAKLtN,EAAGsN,WAMPzhB,KAAK8D,OAAS,GAAIC,GAAO/D,KAAK+vB,IAAIrwB,MAAO4lE,YAAa,UACtDtlE,KAAK8D,OAAOoR,IAAI,SAASi0C,KAAK/lB,QAAQ,IACtCpjC,KAAKulE,YAEL,IAAIC,IACF,MAAO,YAAa,QACpB,QACA,MAAO,WAAY,UAAW,SAsDhC,IA/CAA,EAAOn9D,QAAQ,SAAUvB,GACvB,GAAIgC,GAAW,SAAUQ,GACnB6K,EAAGs1C,YACLt1C,EAAGyZ,KAAK9mB,EAAMwC,GAGlB6K,GAAGrQ,OAAOyP,GAAGzM,EAAMgC,GACnBqL,EAAGoxD,UAAUz+D,GAAQgC,IAIvB9I,KAAK8D,OAAOyP,GAAG,eAAgB,SAAUjK,GACnCA,EAAMqnC,SACJx8B,EAAGs1C,YACLt1C,EAAGyZ,KAAK,QAAStkB,IAGrBwrB,KAAK90B,OAOPA,KAAK+vB,IAAIrwB,KAAKiJ,iBAAiB,aAAci8D,GAC7C5kE,KAAK+vB,IAAIrwB,KAAKiJ,iBAAiB,iBAAkBi8D,GAGjD5kE,KAAKgG,OACHtG,QACAyM,cACA0rB,mBACAgtC,iBACAC,kBACA54C,UACAzkB,QACA4f,SACA1f,OACA2b,UACAlX,UACAi7B,UAAW,EACXo+B,aAAc,GAGhBzlE,KAAK0lE,YAAc,GAGdnsD,EAAW,KAAM,IAAI3V,OAAM,wBAChC2V,GAAU9H,YAAYzR,KAAK+vB,IAAIrwB,OA4BjCw2B,EAAK/iB,UAAUD,WAAa,SAAUzE,GACpC,GAAIA,EAAS,CAEX,GAAIP,IAAU,QAAS,SAAU,YAAa,YAAa,aAAc,QAAS,MAAO,cAAe,aAAc,iBAAkB,cACxIvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,GAEvC,eAAiBzO,MAAKyO,SACxB9M,EAAS81B,qBAAqBz3B,KAAK20B,KAAM30B,KAAKyO,QAAQsmB,aAGpD,cAAgBtmB,KACdA,EAAQ45C,WACLroD,KAAKsoD,YACRtoD,KAAKsoD,UAAY,GAAIhD,GAAUtlD,KAAK+vB,IAAIrwB,OAItCM,KAAKsoD,YACPtoD,KAAKsoD,UAAUh1C,gBACRtT,MAAKsoD,YAMlBtoD,KAAK2lE,kBASP,GALA3lE,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCA,EAAU1yD,WAAWzE,KAInBA,GAAWA,EAAQgH,MACrB,KAAM,IAAI7R,OAAM,wEAIlB5D,MAAKyhB,UAOPyU,EAAK/iB,UAAUs2C,SAAW,WACxB,OAAQzpD,KAAKsoD,WAAatoD,KAAKsoD,UAAUgL,QAM3Cp9B,EAAK/iB,UAAUG,QAAU,WAEvBtT,KAAKyW,QAGLzW,KAAK0T,MAGL1T,KAAK6lE,kBAGD7lE,KAAK+vB,IAAIrwB,KAAKmK,YAChB7J,KAAK+vB,IAAIrwB,KAAKmK,WAAWsH,YAAYnR,KAAK+vB,IAAIrwB,MAEhDM,KAAK+vB,IAAM,KAGP/vB,KAAKsoD,YACPtoD,KAAKsoD,UAAUh1C,gBACRtT,MAAKsoD,UAId,KAAK,GAAIh/C,KAAStJ,MAAKulE,UACjBvlE,KAAKulE,UAAUz/D,eAAewD,UACzBtJ,MAAKulE,UAAUj8D,EAG1BtJ,MAAKulE,UAAY,KACjBvlE,KAAK8D,OAAS,KAGd9D,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCA,EAAUtyD,YAGZtT,KAAK20B,KAAO,MAQduB,EAAK/iB,UAAUsyB,cAAgB,SAAUtL,GACvC,IAAKn6B,KAAK41B,WACR,KAAM,IAAIhyB,OAAM,yDAGlB5D,MAAK41B,WAAW6P,cAActL,IAOhCjE,EAAK/iB,UAAUuyB,cAAgB,WAC7B,IAAK1lC,KAAK41B,WACR,KAAM,IAAIhyB,OAAM,yDAGlB,OAAO5D,MAAK41B,WAAW8P,iBAQzBxP,EAAK/iB,UAAUi+B,gBAAkB,WAC/B,MAAOpxC,MAAK61B,SAAW71B,KAAK61B,QAAQub,uBAetClb,EAAK/iB,UAAUsD,MAAQ,SAASqvD,KAEzBA,GAAQA,EAAK7jE,QAChBjC,KAAKi2B,SAAS,QAIX6vC,GAAQA,EAAK3xC,SAChBn0B,KAAKg2B,UAAU,QAIZ8vC,GAAQA,EAAKr3D,WAChBzO,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCA,EAAU1yD,WAAW0yD,EAAUvxC,kBAGjCr0B,KAAKkT,WAAWlT,KAAKq0B,kBAazB6B,EAAK/iB,UAAUsjB,IAAM,SAAShoB,GAC5B,GAAIgnB,GAAQz1B,KAAKs2B,eAGjB,IAAoB,OAAhBb,EAAM7lB,OAAgC,OAAd6lB,EAAM5lB,IAAlC,CAIA,GAAI2mB,GAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAC7Ex2B,MAAKy1B,MAAMlC,SAASkC,EAAM7lB,MAAO6lB,EAAM5lB,IAAK2mB,KAQ9CN,EAAK/iB,UAAUmjB,cAAgB,WAE7B,GAAID,GAAYr2B,KAAK+2B,eAGjBnnB,EAAQymB,EAAUvqB,IAClB+D,EAAMwmB,EAAU3pB,GACpB,IAAa,MAATkD,GAAwB,MAAPC,EAAa,CAChC,GAAI2iB,GAAY3iB,EAAI7I,UAAY4I,EAAM5I,SACtB,IAAZwrB,IAEFA,EAAW,OAEb5iB,EAAQ,GAAItL,MAAKsL,EAAM5I,UAAuB,IAAXwrB,GACnC3iB,EAAM,GAAIvL,MAAKuL,EAAI7I,UAAuB,IAAXwrB,GAGjC,OACE5iB,MAAOA,EACPC,IAAKA,IAuBTqmB,EAAK/iB,UAAUojB,UAAY,SAAS3mB,EAAOC,EAAKpB,GAC9C,GAAI+nB,GAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAC7E,IAAwB,GAApB9wB,UAAUC,OAAa,CACzB,GAAI8vB,GAAQ/vB,UAAU,EACtB1F,MAAKy1B,MAAMlC,SAASkC,EAAM7lB,MAAO6lB,EAAM5lB,IAAK2mB,OAG5Cx2B,MAAKy1B,MAAMlC,SAAS3jB,EAAOC,EAAK2mB,IAcpCN,EAAK/iB,UAAU0U,OAAS,SAASsS,EAAM1rB,GACrC,GAAI+jB,GAAWxyB,KAAKy1B,MAAM5lB,IAAM7P,KAAKy1B,MAAM7lB,MACvC9B,EAAInN,EAAKkG,QAAQszB,EAAM,QAAQnzB,UAE/B4I,EAAQ9B,EAAI0kB,EAAW,EACvB3iB,EAAM/B,EAAI0kB,EAAW,EACrBgE,EAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAE7Ex2B,MAAKy1B,MAAMlC,SAAS3jB,EAAOC,EAAK2mB,IAOlCN,EAAK/iB,UAAU4yD,UAAY,WACzB,GAAItwC,GAAQz1B,KAAKy1B,MAAM2J,UACvB,QACExvB,MAAO,GAAItL,MAAKmxB,EAAM7lB,OACtBC,IAAK,GAAIvL,MAAKmxB,EAAM5lB,OAQxBqmB,EAAK/iB,UAAUsO,OAAS,WACtB,GAAIgjB,IAAU,EACVh2B,EAAUzO,KAAKyO,QACfzI,EAAQhG,KAAKgG,MACb+pB,EAAM/vB,KAAK+vB,GAEf,IAAKA,EAAL,CAEApuB,EAASi2B,kBAAkB53B,KAAK20B,KAAM30B,KAAKyO,QAAQsmB,aAGxB,OAAvBtmB,EAAQ8lB,aACV5zB,EAAKiH,aAAamoB,EAAIrwB,KAAM,OAC5BiB,EAAKuH,gBAAgB6nB,EAAIrwB,KAAM,YAG/BiB,EAAKuH,gBAAgB6nB,EAAIrwB,KAAM,OAC/BiB,EAAKiH,aAAamoB,EAAIrwB,KAAM,WAI9BqwB,EAAIrwB,KAAKuN,MAAMunB,UAAY7zB,EAAKmJ,OAAOK,OAAOsE,EAAQ+lB,UAAW,IACjEzE,EAAIrwB,KAAKuN,MAAMwnB,UAAY9zB,EAAKmJ,OAAOK,OAAOsE,EAAQgmB,UAAW,IACjE1E,EAAIrwB,KAAKuN,MAAMsF,MAAQ5R,EAAKmJ,OAAOK,OAAOsE,EAAQ8D,MAAO,IAGzDvM,EAAMoG,OAAO3E,MAAUsoB,EAAI8H,gBAAgBzH,YAAcL,EAAI8H,gBAAgBrY,aAAe,EAC5FxZ,EAAMoG,OAAOib,MAASrhB,EAAMoG,OAAO3E,KACnCzB,EAAMoG,OAAOzE,KAAUooB,EAAI8H,gBAAgBvH,aAAeP,EAAI8H,gBAAgBhT,cAAgB,EAC9F7e,EAAMoG,OAAOkX,OAAStd,EAAMoG,OAAOzE,GACnC,IAAIq+D,GAAkBj2C,EAAIrwB,KAAK4wB,aAAeP,EAAIrwB,KAAKmlB,aACnDohD,EAAkBl2C,EAAIrwB,KAAK0wB,YAAcL,EAAIrwB,KAAK8f,WAIb,KAArCuQ,EAAI8H,gBAAgBhT,eACtB7e,EAAMoG,OAAO3E,KAAOzB,EAAMoG,OAAOzE,IACjC3B,EAAMoG,OAAOib,MAASrhB,EAAMoG,OAAO3E,MAEP,IAA1BsoB,EAAIrwB,KAAKmlB,eACXohD,EAAkBD,GAKpBhgE,EAAMkmB,OAAO1Z,OAASud,EAAI7D,OAAOoE,aACjCtqB,EAAMyB,KAAK+K,OAAWud,EAAItoB,KAAK6oB,aAC/BtqB,EAAMqhB,MAAM7U,OAAUud,EAAI1I,MAAMiJ,aAChCtqB,EAAM2B,IAAI6K,OAAYud,EAAIpoB,IAAIkd,eAAoB7e,EAAMoG,OAAOzE,IAC/D3B,EAAMsd,OAAO9Q,OAASud,EAAIzM,OAAOuB,eAAiB7e,EAAMoG,OAAOkX,MAM/D,IAAI+M,GAAgBnrB,KAAKwH,IAAI1G,EAAMyB,KAAK+K,OAAQxM,EAAMkmB,OAAO1Z,OAAQxM,EAAMqhB,MAAM7U,QAC7E0zD,EAAalgE,EAAM2B,IAAI6K,OAAS6d,EAAgBrqB,EAAMsd,OAAO9Q,OAC/DwzD,EAAmBhgE,EAAMoG,OAAOzE,IAAM3B,EAAMoG,OAAOkX,MACrDyM,GAAIrwB,KAAKuN,MAAMuF,OAAS7R,EAAKmJ,OAAOK,OAAOsE,EAAQ+D,OAAQ0zD,EAAa,MAGxElgE,EAAMtG,KAAK8S,OAASud,EAAIrwB,KAAK4wB,aAC7BtqB,EAAMmG,WAAWqG,OAASxM,EAAMtG,KAAK8S,OAASwzD,CAC9C,IAAI3qC,GAAkBr1B,EAAMtG,KAAK8S,OAASxM,EAAM2B,IAAI6K,OAASxM,EAAMsd,OAAO9Q,OACxEwzD,CACFhgE,GAAM6xB,gBAAgBrlB,OAAU6oB,EAChCr1B,EAAM6+D,cAAcryD,OAAY6oB,EAChCr1B,EAAM8+D,eAAetyD,OAAWxM,EAAM6+D,cAAcryD,OAGpDxM,EAAMtG,KAAK6S,MAAQwd,EAAIrwB,KAAK0wB,YAC5BpqB,EAAMmG,WAAWoG,MAAQvM,EAAMtG,KAAK6S,MAAQ0zD,EAC5CjgE,EAAMyB,KAAK8K,MAAQwd,EAAI80C,cAAcrlD,cAAkBxZ,EAAMoG,OAAO3E,KACpEzB,EAAM6+D,cAActyD,MAAQvM,EAAMyB,KAAK8K,MACvCvM,EAAMqhB,MAAM9U,MAAQwd,EAAI+0C,eAAetlD,cAAgBxZ,EAAMoG,OAAOib,MACpErhB,EAAM8+D,eAAevyD,MAAQvM,EAAMqhB,MAAM9U,KACzC,IAAI4zD,GAAcngE,EAAMtG,KAAK6S,MAAQvM,EAAMyB,KAAK8K,MAAQvM,EAAMqhB,MAAM9U,MAAQ0zD,CAC5EjgE,GAAMkmB,OAAO3Z,MAAiB4zD,EAC9BngE,EAAM6xB,gBAAgBtlB,MAAQ4zD,EAC9BngE,EAAM2B,IAAI4K,MAAoB4zD,EAC9BngE,EAAMsd,OAAO/Q,MAAiB4zD,EAG9Bp2C,EAAI5jB,WAAWc,MAAMuF,OAAmBxM,EAAMmG,WAAWqG,OAAS,KAClEud,EAAIgV,mBAAmB93B,MAAMuF,OAAWxM,EAAMmG,WAAWqG,OAAS,KAClEud,EAAImY,qBAAqBj7B,MAAMuF,OAASxM,EAAM6xB,gBAAgBrlB,OAAS,KACvEud,EAAI8H,gBAAgB5qB,MAAMuF,OAAcxM,EAAM6xB,gBAAgBrlB,OAAS,KACvEud,EAAI80C,cAAc53D,MAAMuF,OAAgBxM,EAAM6+D,cAAcryD,OAAS,KACrEud,EAAI+0C,eAAe73D,MAAMuF,OAAexM,EAAM8+D,eAAetyD,OAAS,KAEtEud,EAAI5jB,WAAWc,MAAMsF,MAAmBvM,EAAMmG,WAAWoG,MAAQ,KACjEwd,EAAIgV,mBAAmB93B,MAAMsF,MAAWvM,EAAM6xB,gBAAgBtlB,MAAQ,KACtEwd,EAAImY,qBAAqBj7B,MAAMsF,MAASvM,EAAMmG,WAAWoG,MAAQ,KACjEwd,EAAI8H,gBAAgB5qB,MAAMsF,MAAcvM,EAAMkmB,OAAO3Z,MAAQ,KAC7Dwd,EAAIpoB,IAAIsF,MAAMsF,MAA0BvM,EAAM2B,IAAI4K,MAAQ,KAC1Dwd,EAAIzM,OAAOrW,MAAMsF,MAAuBvM,EAAMsd,OAAO/Q,MAAQ,KAG7Dwd,EAAI5jB,WAAWc,MAAMxF,KAAiB,IACtCsoB,EAAI5jB,WAAWc,MAAMtF,IAAiB,IACtCooB,EAAIgV,mBAAmB93B,MAAMxF,KAAUzB,EAAMyB,KAAK8K,MAAQvM,EAAMoG,OAAO3E,KAAQ,KAC/EsoB,EAAIgV,mBAAmB93B,MAAMtF,IAAS,IACtCooB,EAAImY,qBAAqBj7B,MAAMxF,KAAO,IACtCsoB,EAAImY,qBAAqBj7B,MAAMtF,IAAO3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAI8H,gBAAgB5qB,MAAMxF,KAAYzB,EAAMyB,KAAK8K,MAAQ,KACzDwd,EAAI8H,gBAAgB5qB,MAAMtF,IAAY3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAI80C,cAAc53D,MAAMxF,KAAc,IACtCsoB,EAAI80C,cAAc53D,MAAMtF,IAAc3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAI+0C,eAAe73D,MAAMxF,KAAczB,EAAMyB,KAAK8K,MAAQvM,EAAMkmB,OAAO3Z,MAAS,KAChFwd,EAAI+0C,eAAe73D,MAAMtF,IAAa3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAIpoB,IAAIsF,MAAMxF,KAAwBzB,EAAMyB,KAAK8K,MAAQ,KACzDwd,EAAIpoB,IAAIsF,MAAMtF,IAAwB,IACtCooB,EAAIzM,OAAOrW,MAAMxF,KAAqBzB,EAAMyB,KAAK8K,MAAQ,KACzDwd,EAAIzM,OAAOrW,MAAMtF,IAAsB3B,EAAM2B,IAAI6K,OAASxM,EAAM6xB,gBAAgBrlB,OAAU,KAI1FxS,KAAKomE,kBAGL,IAAIz8C,GAAS3pB,KAAKgG,MAAMqhC,SACG,WAAvB54B,EAAQ8lB,cACV5K,GAAUzkB,KAAKwH,IAAI1M,KAAKgG,MAAM6xB,gBAAgBrlB,OAASxS,KAAKgG,MAAMkmB,OAAO1Z,OACvExS,KAAKgG,MAAMoG,OAAOzE,IAAM3H,KAAKgG,MAAMoG,OAAOkX,OAAQ,IAEtDyM,EAAI7D,OAAOjf,MAAMxF,KAAO,IACxBsoB,EAAI7D,OAAOjf,MAAMtF,IAAOgiB,EAAS,KACjCoG,EAAItoB,KAAKwF,MAAMxF,KAAS,IACxBsoB,EAAItoB,KAAKwF,MAAMtF,IAASgiB,EAAS,KACjCoG,EAAI1I,MAAMpa,MAAMxF,KAAQ,IACxBsoB,EAAI1I,MAAMpa,MAAMtF,IAAQgiB,EAAS,IAGjC,IAAI08C,GAAwC,GAAxBrmE,KAAKgG,MAAMqhC,UAAiB,SAAW,GACvDi/B,EAAmBtmE,KAAKgG,MAAMqhC,WAAarnC,KAAKgG,MAAMy/D,aAAe,SAAW,EAYpF,IAXA11C,EAAIg1C,UAAU93D,MAAMuqB,WAAsB6uC,EAC1Ct2C,EAAIi1C,aAAa/3D,MAAMuqB,WAAmB8uC,EAC1Cv2C,EAAIk1C,cAAch4D,MAAMuqB,WAAkB6uC,EAC1Ct2C,EAAIm1C,iBAAiBj4D,MAAMuqB,WAAe8uC,EAC1Cv2C,EAAIo1C,eAAel4D,MAAMuqB,WAAiB6uC,EAC1Ct2C,EAAIq1C,kBAAkBn4D,MAAMuqB,WAAc8uC,EAG1CtmE,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCnhC,EAAUmhC,EAAUnkD,UAAYgjB,IAE9BA,EAAS,CAEX,GAAI8hC,GAAc,CACdvmE,MAAK0lE,YAAca,GACrBvmE,KAAK0lE,cACL1lE,KAAKyhB,UAGLkX,QAAQhF,IAAI,qCAEd3zB,KAAK0lE,YAAc,EAGrB1lE,KAAK4tB,KAAK,oBAIZsI,EAAK/iB,UAAUqzD,QAAU,WACvB,KAAM,IAAI5iE,OAAM,wDAUlBsyB,EAAK/iB,UAAUiyB,eAAiB,SAASjL,GACvC,IAAKn6B,KAAK21B,YACR,KAAM,IAAI/xB,OAAM,sCAGlB5D,MAAK21B,YAAYyP,eAAejL,IAQlCjE,EAAK/iB,UAAUkyB,eAAiB,WAC9B,IAAKrlC,KAAK21B,YACR,KAAM,IAAI/xB,OAAM,sCAGlB,OAAO5D,MAAK21B,YAAY0P,kBAU1BnP,EAAK/iB,UAAUmiB,QAAU,SAASvjB,GAChC,MAAOpQ,GAAS0zB,OAAOr1B,KAAM+R,EAAG/R,KAAKgG,MAAMkmB,OAAO3Z,QAUpD2jB,EAAK/iB,UAAUqiB,cAAgB,SAASzjB,GACtC,MAAOpQ,GAAS0zB,OAAOr1B,KAAM+R,EAAG/R,KAAKgG,MAAMtG,KAAK6S,QAalD2jB,EAAK/iB,UAAU+hB,UAAY,SAASiF,GAClC,MAAOx4B,GAASszB,SAASj1B,KAAMm6B,EAAMn6B,KAAKgG,MAAMkmB,OAAO3Z,QAczD2jB,EAAK/iB,UAAUiiB,gBAAkB,SAAS+E,GACxC,MAAOx4B,GAASszB,SAASj1B,KAAMm6B,EAAMn6B,KAAKgG,MAAMtG,KAAK6S,QAUvD2jB,EAAK/iB,UAAUwyD,gBAAkB,WACA,GAA3B3lE,KAAKyO,QAAQ6lB,WACft0B,KAAKymE,mBAGLzmE,KAAK6lE,mBAST3vC,EAAK/iB,UAAUszD,iBAAmB,WAChC,GAAItyD,GAAKnU,IAETA,MAAK6lE,kBAEL7lE,KAAK0mE,UAAY,WACf,MAA6B,IAAzBvyD,EAAG1F,QAAQ6lB,eAEbngB,GAAG0xD,uBAID1xD,EAAG4b,IAAIrwB,OAKJyU,EAAG4b,IAAIrwB,KAAK0wB,aAAejc,EAAGnO,MAAM2rC,WACtCx9B,EAAG4b,IAAIrwB,KAAK4wB,cAAgBnc,EAAGnO,MAAM2gE,cACtCxyD,EAAGnO,MAAM2rC,UAAYx9B,EAAG4b,IAAIrwB,KAAK0wB,YACjCjc,EAAGnO,MAAM2gE,WAAaxyD,EAAG4b,IAAIrwB,KAAK4wB,aAElCnc,EAAGyZ,KAAK,aAMdjtB,EAAKgI,iBAAiBY,OAAQ,SAAUvJ,KAAK0mE,WAE7C1mE,KAAK4mE,WAAaC,YAAY7mE,KAAK0mE,UAAW,MAOhDxwC,EAAK/iB,UAAU0yD,gBAAkB,WAC3B7lE,KAAK4mE,aACPn0C,cAAczyB,KAAK4mE,YACnB5mE,KAAK4mE,WAAapgE,QAIpB7F,EAAKwI,oBAAoBI,OAAQ,SAAUvJ,KAAK0mE,WAChD1mE,KAAK0mE,UAAY,MASnBxwC,EAAK/iB,UAAU2zD,cAAgB,SAAUz/B,GAGvC,MAFArnC,MAAKgG,MAAMqhC,UAAYA,EACvBrnC,KAAKomE,mBACEpmE,KAAKgG,MAAMqhC,WAQpBnR,EAAK/iB,UAAUizD,iBAAmB,WAEhC,GAAIX,GAAevgE,KAAK4G,IAAI9L,KAAKgG,MAAM6xB,gBAAgBrlB,OAASxS,KAAKgG,MAAMkmB,OAAO1Z,OAAQ,EAc1F,OAbIizD,IAAgBzlE,KAAKgG,MAAMy/D,eAGG,UAA5BzlE,KAAKyO,QAAQ8lB,cACfv0B,KAAKgG,MAAMqhC,WAAco+B,EAAezlE,KAAKgG,MAAMy/D,cAErDzlE,KAAKgG,MAAMy/D,aAAeA,GAIxBzlE,KAAKgG,MAAMqhC,UAAY,IAAGrnC,KAAKgG,MAAMqhC,UAAY,GACjDrnC,KAAKgG,MAAMqhC,UAAYo+B,IAAczlE,KAAKgG,MAAMqhC,UAAYo+B,GAEzDzlE,KAAKgG,MAAMqhC,WAQpBnR,EAAK/iB,UAAU4zD,cAAgB,WAC7B,MAAO/mE,MAAKgG,MAAMqhC,WAGpBxnC,EAAOD,QAAUs2B,GAKb,SAASr2B,EAAQD,EAASM,GAEjBA,EAAoB,GAOjCN,GAAQ0pD,QAAU,SAAUxlD,EAAQwE,GAClCA,EAAS0+D,aAAe,SAAU19D,GAC5BA,EAAMqnC,SACRroC,EAASgB,IAIbxF,EAAOyP,GAAG,eAAgBjL,EAAS0+D,eAQrCpnE,EAAQqnE,UAAY,SAAUnjE,EAAQwE,GAOpC,MANAA,GAAS0+D,aAAe,SAAU19D,GAC5BA,EAAM49D,SACR5+D,EAASgB,IAINxF,EAAOyP,GAAG,eAAgBjL,EAAS0+D,eAQ5CpnE,EAAQunE,SAAW,SAAUrjE,EAAQwE,GACnCxE,EAAO4P,IAAI,eAAgBpL,EAAS0+D,eAQtCpnE,EAAQwnE,WAAaxnE,EAAQunE,UAKzB,SAAStnE,EAAQD,GAGrBA,EAAY,IACVk6B,QAAS,UACTK,KAAM,QAERv6B,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,GAG/BA,EAAY,IACVynE,OAAQ,aACRltC,KAAM,QAERv6B,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,IAK3B,SAASC,EAAQD,GAGrBA,EAAY,IACVg9C,KAAM,OACNG,IAAK,kBACLuqB,KAAM,OACNrG,QAAS,WACTG,QAAS,WACTmG,SAAU,YACV1qB,SAAU,YACV2qB,eAAgB,+CAChBC,gBAAiB,qEACjBC,oBAAqB,wEACrBC,gBAAiB,kCACjBC,mBAAoB,+BAEtBhoE,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,GAG/BA,EAAY,IACVg9C,KAAM,WACNG,IAAK,uBACLuqB,KAAM,QACNrG,QAAS,iBACTG,QAAS,iBACTmG,SAAU,gBACV1qB,SAAU,gBACV2qB,eAAgB,uDAChBC,gBAAiB,6EACjBC,oBAAqB,kFACrBC,gBAAiB,wCACjBC,mBAAoB,2CAEtBhoE,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,IAK3B,WAKoC,mBAA7BioE,4BAKTA,yBAAyB10D,UAAUisD,OAAS,SAASrtD,EAAGC,EAAGvH,GACzDzK,KAAK4nB,YACL5nB,KAAK0rB,IAAI3Z,EAAGC,EAAGvH,EAAG,EAAG,EAAEvF,KAAKymB,IAAI,IASlCk8C,yBAAyB10D,UAAU20D,OAAS,SAAS/1D,EAAGC,EAAGvH,GACzDzK,KAAK4nB,YACL5nB,KAAKyS,KAAKV,EAAItH,EAAGuH,EAAIvH,EAAO,EAAJA,EAAW,EAAJA,IASjCo9D,yBAAyB10D,UAAU4b,SAAW,SAAShd,EAAGC,EAAGvH,GAE3DzK,KAAK4nB,WAEL,IAAIhc,GAAQ,EAAJnB,EACJs9D,EAAKn8D,EAAI,EACTo8D,EAAK9iE,KAAKyqB,KAAK,GAAK,EAAI/jB,EACxBD,EAAIzG,KAAKyqB,KAAK/jB,EAAIA,EAAIm8D,EAAKA,EAE/B/nE,MAAK6nB,OAAO9V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAKioB,aASP4/C,yBAAyB10D,UAAU80D,aAAe,SAASl2D,EAAGC,EAAGvH,GAE/DzK,KAAK4nB,WAEL,IAAIhc,GAAQ,EAAJnB,EACJs9D,EAAKn8D,EAAI,EACTo8D,EAAK9iE,KAAKyqB,KAAK,GAAK,EAAI/jB,EACxBD,EAAIzG,KAAKyqB,KAAK/jB,EAAIA,EAAIm8D,EAAKA,EAE/B/nE,MAAK6nB,OAAO9V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAKioB,aASP4/C,yBAAyB10D,UAAU+0D,KAAO,SAASn2D,EAAGC,EAAGvH,GAEvDzK,KAAK4nB,WAEL,KAAK,GAAIugD,GAAI,EAAO,GAAJA,EAAQA,IAAK,CAC3B,GAAI18C,GAAU08C,EAAI,IAAM,EAAS,IAAJ19D,EAAc,GAAJA,CACvCzK,MAAK8nB,OACD/V,EAAI0Z,EAASvmB,KAAKkZ,IAAQ,EAAJ+pD,EAAQjjE,KAAKymB,GAAK,IACxC3Z,EAAIyZ,EAASvmB,KAAKqZ,IAAQ,EAAJ4pD,EAAQjjE,KAAKymB,GAAK,KAI9C3rB,KAAKioB,aAMP4/C,yBAAyB10D,UAAUssD,UAAY,SAAS1tD,EAAGC,EAAGq9C,EAAG1jD,EAAGlB,GAClE,GAAI29D,GAAMljE,KAAKymB,GAAG,GACE,GAAhB0jC,EAAM,EAAI5kD,IAAYA,EAAM4kD,EAAI,GAChB,EAAhB1jD,EAAM,EAAIlB,IAAYA,EAAMkB,EAAI,GACpC3L,KAAK4nB,YACL5nB,KAAK6nB,OAAO9V,EAAEtH,EAAEuH,GAChBhS,KAAK8nB,OAAO/V,EAAEs9C,EAAE5kD,EAAEuH,GAClBhS,KAAK0rB,IAAI3Z,EAAEs9C,EAAE5kD,EAAEuH,EAAEvH,EAAEA,EAAM,IAAJ29D,EAAY,IAAJA,GAAQ,GACrCpoE,KAAK8nB,OAAO/V,EAAEs9C,EAAEr9C,EAAErG,EAAElB,GACpBzK,KAAK0rB,IAAI3Z,EAAEs9C,EAAE5kD,EAAEuH,EAAErG,EAAElB,EAAEA,EAAE,EAAM,GAAJ29D,GAAO,GAChCpoE,KAAK8nB,OAAO/V,EAAEtH,EAAEuH,EAAErG,GAClB3L,KAAK0rB,IAAI3Z,EAAEtH,EAAEuH,EAAErG,EAAElB,EAAEA,EAAM,GAAJ29D,EAAW,IAAJA,GAAQ,GACpCpoE,KAAK8nB,OAAO/V,EAAEC,EAAEvH,GAChBzK,KAAK0rB,IAAI3Z,EAAEtH,EAAEuH,EAAEvH,EAAEA,EAAM,IAAJ29D,EAAY,IAAJA,GAAQ,IAMrCP,yBAAyB10D,UAAUysD,QAAU,SAAS7tD,EAAGC,EAAGq9C,EAAG1jD,GAC7D,GAAI08D,GAAQ,SACRC,EAAMjZ,EAAI,EAAKgZ,EACfE,EAAM58D,EAAI,EAAK08D,EACfG,EAAKz2D,EAAIs9C,EACToZ,EAAKz2D,EAAIrG,EACT+8D,EAAK32D,EAAIs9C,EAAI,EACbsZ,EAAK32D,EAAIrG,EAAI,CAEjB3L;KAAK4nB,YACL5nB,KAAK6nB,OAAO9V,EAAG42D,GACf3oE,KAAK4oE,cAAc72D,EAAG42D,EAAKJ,EAAIG,EAAKJ,EAAIt2D,EAAG02D,EAAI12D,GAC/ChS,KAAK4oE,cAAcF,EAAKJ,EAAIt2D,EAAGw2D,EAAIG,EAAKJ,EAAIC,EAAIG,GAChD3oE,KAAK4oE,cAAcJ,EAAIG,EAAKJ,EAAIG,EAAKJ,EAAIG,EAAIC,EAAID,GACjDzoE,KAAK4oE,cAAcF,EAAKJ,EAAIG,EAAI12D,EAAG42D,EAAKJ,EAAIx2D,EAAG42D,IAQjDd,yBAAyB10D,UAAUusD,SAAW,SAAS3tD,EAAGC,EAAGq9C,EAAG1jD,GAC9D,GAAIiC,GAAI,EAAE,EACNi7D,EAAWxZ,EACXyZ,EAAWn9D,EAAIiC,EAEfy6D,EAAQ,SACRC,EAAMO,EAAW,EAAKR,EACtBE,EAAMO,EAAW,EAAKT,EACtBG,EAAKz2D,EAAI82D,EACTJ,EAAKz2D,EAAI82D,EACTJ,EAAK32D,EAAI82D,EAAW,EACpBF,EAAK32D,EAAI82D,EAAW,EACpBC,EAAM/2D,GAAKrG,EAAIm9D,EAAS,GACxBE,EAAMh3D,EAAIrG,CAEd3L,MAAK4nB,YACL5nB,KAAK6nB,OAAO2gD,EAAIG,GAEhB3oE,KAAK4oE,cAAcJ,EAAIG,EAAKJ,EAAIG,EAAKJ,EAAIG,EAAIC,EAAID,GACjDzoE,KAAK4oE,cAAcF,EAAKJ,EAAIG,EAAI12D,EAAG42D,EAAKJ,EAAIx2D,EAAG42D,GAE/C3oE,KAAK4oE,cAAc72D,EAAG42D,EAAKJ,EAAIG,EAAKJ,EAAIt2D,EAAG02D,EAAI12D,GAC/ChS,KAAK4oE,cAAcF,EAAKJ,EAAIt2D,EAAGw2D,EAAIG,EAAKJ,EAAIC,EAAIG,GAEhD3oE,KAAK8nB,OAAO0gD,EAAIO,GAEhB/oE,KAAK4oE,cAAcJ,EAAIO,EAAMR,EAAIG,EAAKJ,EAAIU,EAAKN,EAAIM,GACnDhpE,KAAK4oE,cAAcF,EAAKJ,EAAIU,EAAKj3D,EAAGg3D,EAAMR,EAAIx2D,EAAGg3D,GAEjD/oE,KAAK8nB,OAAO/V,EAAG42D,IAOjBd,yBAAyB10D,UAAUukD,MAAQ,SAAS3lD,EAAGC,EAAGo8C,EAAOzoD,GAE/D,GAAIsjE,GAAKl3D,EAAIpM,EAAST,KAAKqZ,IAAI6vC,GAC3B8a,EAAKl3D,EAAIrM,EAAST,KAAKkZ,IAAIgwC,GAI3B+a,EAAKp3D,EAAa,GAATpM,EAAeT,KAAKqZ,IAAI6vC,GACjCgb,EAAKp3D,EAAa,GAATrM,EAAeT,KAAKkZ,IAAIgwC,GAGjCib,EAAKJ,EAAKtjE,EAAS,EAAIT,KAAKqZ,IAAI6vC,EAAQ,GAAMlpD,KAAKymB,IACnD29C,EAAKJ,EAAKvjE,EAAS,EAAIT,KAAKkZ,IAAIgwC,EAAQ,GAAMlpD,KAAKymB,IAGnD49C,EAAKN,EAAKtjE,EAAS,EAAIT,KAAKqZ,IAAI6vC,EAAQ,GAAMlpD,KAAKymB,IACnD69C,EAAKN,EAAKvjE,EAAS,EAAIT,KAAKkZ,IAAIgwC,EAAQ,GAAMlpD,KAAKymB,GAEvD3rB,MAAK4nB,YACL5nB,KAAK6nB,OAAO9V,EAAGC,GACfhS,KAAK8nB,OAAOuhD,EAAIC,GAChBtpE,KAAK8nB,OAAOqhD,EAAIC,GAChBppE,KAAK8nB,OAAOyhD,EAAIC,GAChBxpE,KAAKioB,aASP4/C,yBAAyB10D,UAAUqkD,WAAa,SAASzlD,EAAEC,EAAEwmD,EAAGC,EAAGgR,GAC5DA,IAAWA,GAAW,GAAG,IACd,GAAZC,IAAeA,EAAa,KAChC,IAAIC,GAAYF,EAAU9jE,MAC1B3F,MAAK6nB,OAAO9V,EAAGC,EAKf,KAJA,GAAI4M,GAAM45C,EAAGzmD,EAAI8M,EAAM45C,EAAGzmD,EACtB43D,EAAQ/qD,EAAGD,EACXirD,EAAgB3kE,KAAKyqB,KAAM/Q,EAAGA,EAAKC,EAAGA,GACtCirD,EAAU,EAAG/9B,GAAK,EACf89B,GAAe,IAAI,CACxB,GAAIH,GAAaD,EAAUK,IAAYH,EACnCD,GAAaG,IAAeH,EAAaG,EAC7C,IAAInuD,GAAQxW,KAAKyqB,KAAM+5C,EAAWA,GAAc,EAAIE,EAAMA,GACnD,GAAHhrD,IAAMlD,GAASA,GACnB3J,GAAK2J,EACL1J,GAAK43D,EAAMluD,EACX1b,KAAK+rC,EAAO,SAAW,UAAUh6B,EAAEC,GACnC63D,GAAiBH,EACjB39B,GAAQA,MAUV,SAASlsC,EAAQD,EAASM,GAQ9B,QAASyqC,GAAKrT,EAAS7oB,GACrBzO,KAAKs3B,QAAUA,EACft3B,KAAKyO,QAAUA,EALjB,GAAI7N,GAAUV,EAAoB,GAC9B2qC,EAAS3qC,EAAoB,GAOjCyqC,GAAKx3B,UAAU04B,UAAY,SAASC,GAGlC,IAAK,GAFDlwB,GAAOkwB,EAAU,GAAG95B,EACpB8J,EAAOgwB,EAAU,GAAG95B,EACf4Z,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpChQ,EAAOA,EAAOkwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI4J,EAChDE,EAAOA,EAAOgwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI8J,CAElD,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,EAAM8vB,iBAAkB5rC,KAAKyO,QAAQm9B,mBAU/DjB,EAAKx3B,UAAU44B,KAAO,SAAU/U,EAAS/kB,EAAO+5B,GAC9C,GAAe,MAAXhV,GACEA,EAAQrxB,OAAS,EAAG,CACtB,GAAIwlC,GAAMx+B,EACNosC,EAAY70C,OAAO8nC,EAAUpG,IAAI34B,MAAMuF,OAAOhI,QAAQ,KAAK,IAgB/D,IAfA2gC,EAAOvqC,EAAQwQ,cAAc,OAAQ46B,EAAU/E,YAAa+E,EAAUpG,KACtEuF,EAAK/4B,eAAe,KAAM,QAASH,EAAMpK,WACtBrB,SAAhByL,EAAMhF,OACPk+B,EAAK/4B,eAAe,KAAM,QAASH,EAAMhF,OAKzCN,EADsC,GAApCsF,EAAMxD,QAAQs8B,WAAWr8B,QACvBi8B,EAAKo/B,YAAY/yC,EAAS/kB,GAG1B04B,EAAKq/B,QAAQhzC,GAIiB,GAAhC/kB,EAAMxD,QAAQ88B,OAAO78B,QAAiB,CACxC,GACIu7D,GADA7+B,EAAWxqC,EAAQwQ,cAAc,OAAQ46B,EAAU/E,YAAa+E,EAAUpG,IAG5EqkC,GADsC,OAApCh4D,EAAMxD,QAAQ88B,OAAOhX,YACf,IAAMyC,EAAQ,GAAGjlB,EAAI,MAAgBpF,EAAI,IAAMqqB,EAAQA,EAAQrxB,OAAS,GAAGoM,EAAI,KAG/E,IAAMilB,EAAQ,GAAGjlB,EAAI,IAAMgnC,EAAY,IAAMpsC,EAAI,IAAMqqB,EAAQA,EAAQrxB,OAAS,GAAGoM,EAAI,IAAMgnC,EAEvG3N,EAASh5B,eAAe,KAAM,QAASH,EAAMpK,UAAY,SACvBrB,SAA/ByL,EAAMxD,QAAQ88B,OAAOt+B,OACtBm+B,EAASh5B,eAAe,KAAM,QAASH,EAAMxD,QAAQ88B,OAAOt+B,OAE9Dm+B,EAASh5B,eAAe,KAAM,IAAK63D,GAGrC9+B,EAAK/4B,eAAe,KAAM,IAAK,IAAMzF,GAGG,GAApCsF,EAAMxD,QAAQ0D,WAAWzD,SAC3Bm8B,EAAOkB,KAAK/U,EAAS/kB,EAAO+5B,KAepCrB,EAAKu/B,mBAAqB,SAASx3D,GAMjC,IAAK,GAJDy3D,GAAIC,EAAIC,EAAIC,EAAIC,EAAKC,EACrB79D,EAAIzH,KAAKwoB,MAAMhb,EAAK,GAAGX,GAAK,IAAM7M,KAAKwoB,MAAMhb,EAAK,GAAGV,GAAK,IAC1Dy4D,EAAgB,EAAE,EAClB9kE,EAAS+M,EAAK/M,OACTH,EAAI,EAAOG,EAAS,EAAbH,EAAgBA,IAE9B2kE,EAAW,GAAL3kE,EAAUkN,EAAK,GAAKA,EAAKlN,EAAE,GACjC4kE,EAAK13D,EAAKlN,GACV6kE,EAAK33D,EAAKlN,EAAE,GACZ8kE,EAAc3kE,EAARH,EAAI,EAAckN,EAAKlN,EAAE,GAAK6kE,EAUpCE,GAAQx4D,IAAMo4D,EAAGp4D,EAAI,EAAEq4D,EAAGr4D,EAAIs4D,EAAGt4D,GAAI04D,EAAgBz4D,IAAMm4D,EAAGn4D,EAAI,EAAEo4D,EAAGp4D,EAAIq4D,EAAGr4D,GAAIy4D,GAClFD,GAAQz4D,GAAMq4D,EAAGr4D,EAAI,EAAEs4D,EAAGt4D,EAAIu4D,EAAGv4D,GAAI04D,EAAgBz4D,GAAMo4D,EAAGp4D,EAAI,EAAEq4D,EAAGr4D,EAAIs4D,EAAGt4D,GAAIy4D,GAGlF99D,GAAK,IACL49D,EAAIx4D,EAAI,IACRw4D,EAAIv4D,EAAI,IACRw4D,EAAIz4D,EAAI,IACRy4D,EAAIx4D,EAAI,IACRq4D,EAAGt4D,EAAI,IACPs4D,EAAGr4D,EAAI,GAGT,OAAOrF,IAcTg+B,EAAKo/B,YAAc,SAASr3D,EAAMT,GAChC,GAAIg5B,GAAQh5B,EAAMxD,QAAQs8B,WAAWE,KACrC,IAAa,GAATA,GAAwBzkC,SAAVykC,EAChB,MAAOjrC,MAAKkqE,mBAAmBx3D,EAO/B,KAAK,GAJDy3D,GAAIC,EAAIC,EAAIC,EAAIC,EAAKC,EAAKE,EAAGC,EAAGC,EAAIC,EAAGpgD,EAAGqgD,EAAGC,EAC7CC,EAAQC,EAAQC,EAASC,EAASC,EAASC,EAC3C1+D,EAAIzH,KAAKwoB,MAAMhb,EAAK,GAAGX,GAAK,IAAM7M,KAAKwoB,MAAMhb,EAAK,GAAGV,GAAK,IAC1DrM,EAAS+M,EAAK/M,OACTH,EAAI,EAAOG,EAAS,EAAbH,EAAgBA,IAE9B2kE,EAAW,GAAL3kE,EAAUkN,EAAK,GAAKA,EAAKlN,EAAE,GACjC4kE,EAAK13D,EAAKlN,GACV6kE,EAAK33D,EAAKlN,EAAE,GACZ8kE,EAAc3kE,EAARH,EAAI,EAAckN,EAAKlN,EAAE,GAAK6kE,EAEpCK,EAAKxlE,KAAKyqB,KAAKzqB,KAAK4uB,IAAIq2C,EAAGp4D,EAAIq4D,EAAGr4D,EAAE,GAAK7M,KAAK4uB,IAAIq2C,EAAGn4D,EAAIo4D,EAAGp4D,EAAE,IAC9D24D,EAAKzlE,KAAKyqB,KAAKzqB,KAAK4uB,IAAIs2C,EAAGr4D,EAAIs4D,EAAGt4D,EAAE,GAAK7M,KAAK4uB,IAAIs2C,EAAGp4D,EAAIq4D,EAAGr4D,EAAE,IAC9D44D,EAAK1lE,KAAKyqB,KAAKzqB,KAAK4uB,IAAIu2C,EAAGt4D,EAAIu4D,EAAGv4D,EAAE,GAAK7M,KAAK4uB,IAAIu2C,EAAGr4D,EAAIs4D,EAAGt4D,EAAE,IAY9Dg5D,EAAU9lE,KAAK4uB,IAAI82C,EAAK3/B,GACxBigC,EAAUhmE,KAAK4uB,IAAI82C,EAAG,EAAE3/B,GACxBggC,EAAU/lE,KAAK4uB,IAAI62C,EAAK1/B,GACxBkgC,EAAUjmE,KAAK4uB,IAAI62C,EAAG,EAAE1/B,GACxBogC,EAAUnmE,KAAK4uB,IAAI42C,EAAKz/B,GACxBmgC,EAAUlmE,KAAK4uB,IAAI42C,EAAG,EAAEz/B,GAExB4/B,EAAI,EAAEO,EAAU,EAAEC,EAASJ,EAASE,EACpC1gD,EAAI,EAAEygD,EAAU,EAAEF,EAASC,EAASE,EACpCL,EAAI,EAAEO,GAAUA,EAASJ,GACrBH,EAAI,IAAIA,EAAI,EAAIA,GACpBC,EAAI,EAAEC,GAAUA,EAASC,GACrBF,EAAI,IAAIA,EAAI,EAAIA,GAEpBR,GAAQx4D,IAAMo5D,EAAUhB,EAAGp4D,EAAI84D,EAAET,EAAGr4D,EAAIq5D,EAAUf,EAAGt4D,GAAK+4D,EACxD94D,IAAMm5D,EAAUhB,EAAGn4D,EAAI64D,EAAET,EAAGp4D,EAAIo5D,EAAUf,EAAGr4D,GAAK84D,GAEpDN,GAAQz4D,GAAMm5D,EAAUd,EAAGr4D,EAAI0Y,EAAE4/C,EAAGt4D,EAAIo5D,EAAUb,EAAGv4D,GAAKg5D,EACxD/4D,GAAMk5D,EAAUd,EAAGp4D,EAAIyY,EAAE4/C,EAAGr4D,EAAIm5D,EAAUb,EAAGt4D,GAAK+4D,GAEvC,GAATR,EAAIx4D,GAAmB,GAATw4D,EAAIv4D,IAASu4D,EAAMH,GACxB,GAATI,EAAIz4D,GAAmB,GAATy4D,EAAIx4D,IAASw4D,EAAMH,GACrC19D,GAAK,IACL49D,EAAIx4D,EAAI,IACRw4D,EAAIv4D,EAAI,IACRw4D,EAAIz4D,EAAI,IACRy4D,EAAIx4D,EAAI,IACRq4D,EAAGt4D,EAAI,IACPs4D,EAAGr4D,EAAI,GAGT,OAAOrF,IAUXg+B,EAAKq/B,QAAU,SAASt3D,GAGtB,IAAK,GADD/F,GAAI,GACCnH,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAE7BmH,GADO,GAALnH,EACGkN,EAAKlN,GAAGuM,EAAI,IAAMW,EAAKlN,GAAGwM,EAG1B,IAAMU,EAAKlN,GAAGuM,EAAI,IAAMW,EAAKlN,GAAGwM,CAGzC,OAAOrF,IAGT9M,EAAOD,QAAU+qC,GAKb,SAAS9qC,EAAQD,EAASM,GAQ9B,QAASorE,GAASh0C,EAAS7oB,GACzBzO,KAAKs3B,QAAUA,EACft3B,KAAKyO,QAAUA,EALjB,CAAA,GAAI7N,GAAUV,EAAoB,EACrBA,GAAoB,IAOjCorE,EAASn4D,UAAU04B,UAAY,SAASC,GACtC,GAA2C,SAAvC9rC,KAAKyO,QAAQymC,SAASC,cAA0B,CAGlD,IAAK,GAFDv5B,GAAOkwB,EAAU,GAAG95B,EACpB8J,EAAOgwB,EAAU,GAAG95B,EACf4Z,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpChQ,EAAOA,EAAOkwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI4J,EAChDE,EAAOA,EAAOgwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI8J,CAElD,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,EAAM8vB,iBAAkB5rC,KAAKyO,QAAQm9B,kBAI7D,IAAK,GADD2/B,MACK3/C,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpC2/C,EAAgBvjE,MACd+J,EAAG+5B,EAAUlgB,GAAG7Z,EAChBC,EAAG85B,EAAUlgB,GAAG5Z,EAChBslB,QAASt3B,KAAKs3B,SAGlB,OAAOi0C,IAYXD,EAASv/B,KAAO,SAAUmE,EAAUsG,EAAoBxK,GACtD,GAEIw/B,GACA9iE,EAAK+iE,EACLx5D,EACAzM,EAAEomB,EALF8/C,KACAC,KAKAC,EAAY,CAGhB,KAAKpmE,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAE/B,GADAyM,EAAQ+5B,EAAU7X,OAAO+b,EAAS1qC,IACP,OAAvByM,EAAMxD,QAAQxB,OACK,GAAjBgF,EAAMyW,UAAyEliB,SAArDwlC,EAAUv9B,QAAQ0lB,OAAOqD,WAAW0Y,EAAS1qC,KAAyE,GAApDwmC,EAAUv9B,QAAQ0lB,OAAOqD,WAAW0Y,EAAS1qC,KAC3I,IAAKomB,EAAI,EAAGA,EAAI4qB,EAAmBtG,EAAS1qC,IAAIG,OAAQimB,IACtD8/C,EAAa1jE,MACX+J,EAAGykC,EAAmBtG,EAAS1qC,IAAIomB,GAAG7Z,EACtCC,EAAGwkC,EAAmBtG,EAAS1qC,IAAIomB,GAAG5Z,EACtCslB,QAAS4Y,EAAS1qC,KAEpBomE,GAAa,CAMrB,IAAiB,GAAbA,EAeJ,IAZAF,EAAax1D,KAAK,SAAU3Q,EAAGa,GAC7B,MAAIb,GAAEwM,GAAK3L,EAAE2L,EACJxM,EAAE+xB,QAAUlxB,EAAEkxB,QAEd/xB,EAAEwM,EAAI3L,EAAE2L,IAKnBu5D,EAASO,sBAAsBF,EAAeD,GAGzClmE,EAAI,EAAGA,EAAIkmE,EAAa/lE,OAAQH,IAAK,CACxCyM,EAAQ+5B,EAAU7X,OAAOu3C,EAAalmE,GAAG8xB,QACzC,IAAIqP,GAAW,GAAM10B,EAAMxD,QAAQymC,SAAS3iC,KAE5C7J,GAAMgjE,EAAalmE,GAAGuM,CACtB,IAAI+5D,GAAe,CACnB,IAA2BtlE,SAAvBmlE,EAAcjjE,GACZlD,EAAE,EAAIkmE,EAAa/lE,SAAS6lE,EAAetmE,KAAK2lB,IAAI6gD,EAAalmE,EAAE,GAAGuM,EAAIrJ,IAC1ElD,EAAI,IAAwBgmE,EAAetmE,KAAK4G,IAAI0/D,EAAatmE,KAAK2lB,IAAI6gD,EAAalmE,EAAE,GAAGuM,EAAIrJ,KACpG+iE,EAAWH,EAASS,iBAAiBP,EAAcv5D,EAAO00B,OAEvD,CACH,GAAIqlC,GAAUxmE,GAAKmmE,EAAcjjE,GAAKujE,OAASN,EAAcjjE,GAAKwjE,UAC9DC,EAAU3mE,GAAKmmE,EAAcjjE,GAAKwjE,SAAW,EAC7CF,GAAUN,EAAa/lE,SAAS6lE,EAAetmE,KAAK2lB,IAAI6gD,EAAaM,GAASj6D,EAAIrJ,IAClFyjE,EAAU,IAAsBX,EAAetmE,KAAK4G,IAAI0/D,EAAatmE,KAAK2lB,IAAI6gD,EAAaS,GAASp6D,EAAIrJ,KAC5G+iE,EAAWH,EAASS,iBAAiBP,EAAcv5D,EAAO00B,GAC1DglC,EAAcjjE,GAAKwjE,UAAY,EAEa,SAAxCj6D,EAAMxD,QAAQymC,SAASC,eACzB22B,EAAeH,EAAcjjE,GAAK0jE,YAClCT,EAAcjjE,GAAK0jE,aAAen6D,EAAMy4B,aAAeghC,EAAalmE,GAAGwM,GAExB,cAAxCC,EAAMxD,QAAQymC,SAASC,gBAC9Bs2B,EAASl5D,MAAQk5D,EAASl5D,MAAQo5D,EAAcjjE,GAAKujE,OACrDR,EAAS9hD,QAAWgiD,EAAcjjE,GAAa,SAAI+iE,EAASl5D,MAAS,GAAIk5D,EAASl5D,OAASo5D,EAAcjjE,GAAKujE,OAAO,GACjF,QAAhCh6D,EAAMxD,QAAQymC,SAASnG,MAAwB08B,EAAS9hD,QAAU,GAAI8hD,EAASl5D,MAC1C,SAAhCN,EAAMxD,QAAQymC,SAASnG,QAAmB08B,EAAS9hD,QAAU,GAAI8hD,EAASl5D,QAGvF3R,EAAQ0R,QAAQo5D,EAAalmE,GAAGuM,EAAI05D,EAAS9hD,OAAQ+hD,EAAalmE,GAAGwM,EAAI85D,EAAcL,EAASl5D,MAAON,EAAMy4B,aAAeghC,EAAalmE,GAAGwM,EAAGC,EAAMpK,UAAY,OAAQmkC,EAAU/E,YAAa+E,EAAUpG,KAElK,GAApC3zB,EAAMxD,QAAQ0D,WAAWzD,SAC3B9N,EAAQkR,UAAU45D,EAAalmE,GAAGuM,EAAI05D,EAAS9hD,OAAQ+hD,EAAalmE,GAAGwM,EAAGC,EAAO+5B,EAAU/E,YAAa+E,EAAUpG,OAYxH0lC,EAASO,sBAAwB,SAAUF,EAAeD,GAGxD,IAAK,GADDF,GACKhmE,EAAI,EAAGA,EAAIkmE,EAAa/lE,OAAQH,IACnCA,EAAI,EAAIkmE,EAAa/lE,SACvB6lE,EAAetmE,KAAK2lB,IAAI6gD,EAAalmE,EAAI,GAAGuM,EAAI25D,EAAalmE,GAAGuM,IAE9DvM,EAAI,IACNgmE,EAAetmE,KAAK4G,IAAI0/D,EAActmE,KAAK2lB,IAAI6gD,EAAalmE,EAAI,GAAGuM,EAAI25D,EAAalmE,GAAGuM,KAErE,GAAhBy5D,IACuChlE,SAArCmlE,EAAcD,EAAalmE,GAAGuM,KAChC45D,EAAcD,EAAalmE,GAAGuM,IAAMk6D,OAAQ,EAAGC,SAAU,EAAGE,YAAa,IAE3ET,EAAcD,EAAalmE,GAAGuM,GAAGk6D,QAAU,IAejDX,EAASS,iBAAmB,SAAUP,EAAcv5D,EAAO00B,GACzD,GAAIp0B,GAAOoX,CAwBX,OAvBI6hD,GAAev5D,EAAMxD,QAAQymC,SAAS3iC,OAASi5D,EAAe,GAChEj5D,EAAuBo0B,EAAf6kC,EAA0B7kC,EAAW6kC,EAE7C7hD,EAAS,EAC2B,QAAhC1X,EAAMxD,QAAQymC,SAASnG,MACzBplB,GAAU,GAAM6hD,EAEuB,SAAhCv5D,EAAMxD,QAAQymC,SAASnG,QAC9BplB,GAAU,GAAM6hD,KAKlBj5D,EAAQN,EAAMxD,QAAQymC,SAAS3iC,MAC/BoX,EAAS,EAC2B,QAAhC1X,EAAMxD,QAAQymC,SAASnG,MACzBplB,GAAU,GAAM1X,EAAMxD,QAAQymC,SAAS3iC,MAEA,SAAhCN,EAAMxD,QAAQymC,SAASnG,QAC9BplB,GAAU,GAAM1X,EAAMxD,QAAQymC,SAAS3iC,SAInCA,MAAOA,EAAOoX,OAAQA,IAGhC2hD,EAASxzB,oBAAsB,SAASyzB,EAAiB90B,EAAavG,EAAUm8B,EAAY93C,GAC1F,GAAIg3C,EAAgB5lE,OAAS,EAAG,CAE9B4lE,EAAgBr1D,KAAK,SAAU3Q,EAAGa,GAChC,MAAIb,GAAEwM,GAAK3L,EAAE2L,EACJxM,EAAE+xB,QAAUlxB,EAAEkxB,QAEd/xB,EAAEwM,EAAI3L,EAAE2L,GAGnB,IAAI45D,KAEJL,GAASO,sBAAsBF,EAAeJ,GAC9C90B,EAAY41B,GAAcf,EAASgB,qBAAqBX,EAAeJ,GACvE90B,EAAY41B,GAAYzgC,iBAAmBrX,EAC3C2b,EAASloC,KAAKqkE,KAIlBf,EAASgB,qBAAuB,SAAUX,EAAeD,GAIvD,IAAK,GAHDhjE,GACAkT,EAAO8vD,EAAa,GAAG15D,EACvB8J,EAAO4vD,EAAa,GAAG15D,EAClBxM,EAAI,EAAGA,EAAIkmE,EAAa/lE,OAAQH,IACvCkD,EAAMgjE,EAAalmE,GAAGuM,EACKvL,SAAvBmlE,EAAcjjE,IAChBkT,EAAOA,EAAO8vD,EAAalmE,GAAGwM,EAAI05D,EAAalmE,GAAGwM,EAAI4J,EACtDE,EAAOA,EAAO4vD,EAAalmE,GAAGwM,EAAI05D,EAAalmE,GAAGwM,EAAI8J,GAGtD6vD,EAAcjjE,GAAK0jE,aAAeV,EAAalmE,GAAGwM,CAGtD,KAAK,GAAIu6D,KAAQZ,GACXA,EAAc7lE,eAAeymE,KAC/B3wD,EAAOA,EAAO+vD,EAAcY,GAAMH,YAAcT,EAAcY,GAAMH,YAAcxwD,EAClFE,EAAOA,EAAO6vD,EAAcY,GAAMH,YAAcT,EAAcY,GAAMH,YAActwD,EAItF,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,IAG1Bjc,EAAOD,QAAU0rE,GAIb,SAASzrE,EAAQD,EAASM,GAO9B,QAAS2qC,GAAOvT,EAAS7oB,GACvBzO,KAAKs3B,QAAUA,EACft3B,KAAKyO,QAAUA,EAJjB,GAAI7N,GAAUV,EAAoB,EAQlC2qC,GAAO13B,UAAU04B,UAAY,SAASC,GAGpC,IAAK,GAFDlwB,GAAOkwB,EAAU,GAAG95B,EACpB8J,EAAOgwB,EAAU,GAAG95B,EACf4Z,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpChQ,EAAOA,EAAOkwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI4J,EAChDE,EAAOA,EAAOgwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI8J,CAElD,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,EAAM8vB,iBAAkB5rC,KAAKyO,QAAQm9B,mBAG/Df,EAAO13B,UAAU44B,KAAO,SAAS/U,EAAS/kB,EAAO+5B,EAAWriB,GAC1DkhB,EAAOkB,KAAK/U,EAAS/kB,EAAO+5B,EAAWriB,IAYzCkhB,EAAOkB,KAAO,SAAU/U,EAAS/kB,EAAO+5B,EAAWriB,GAClCnjB,SAAXmjB,IAAuBA,EAAS,EACpC,KAAK,GAAInkB,GAAI,EAAGA,EAAIwxB,EAAQrxB,OAAQH,IAClC5E,EAAQkR,UAAUklB,EAAQxxB,GAAGuM,EAAI4X,EAAQqN,EAAQxxB,GAAGwM,EAAGC,EAAO+5B,EAAU/E,YAAa+E,EAAUpG,MAKnG/lC,EAAOD,QAAUirC,GAIb,SAAShrC,EAAQD,EAASM,GAE9B,GAAIssE,GAAetsE,EAAoB,IACnCusE,EAAevsE,EAAoB,IACnCwsE,EAAexsE,EAAoB,IACnCysE,EAAiBzsE,EAAoB,IACrC0sE,EAAoB1sE,EAAoB,IACxC2sE,EAAkB3sE,EAAoB,IACtC4sE,EAA0B5sE,EAAoB,GAQlDN,GAAQmtE,WAAa,SAAUC,GAC7B,IAAK,GAAIC,KAAiBD,GACpBA,EAAelnE,eAAemnE,KAChCjtE,KAAKitE,GAAiBD,EAAeC,KAY3CrtE,EAAQstE,YAAc,SAAUF,GAC9B,IAAK,GAAIC,KAAiBD,GACpBA,EAAelnE,eAAemnE,KAChCjtE,KAAKitE,GAAiBzmE,SAW5B5G,EAAQqjD,mBAAqB,WAC3BjjD,KAAK+sE,WAAWP,GAChBxsE,KAAKmtE,2BACkC,GAAnCntE,KAAK0hD,UAAUnD,iBACjBv+C,KAAKotE,4BAGLptE,KAAKoqD,gCAUTxqD,EAAQujD,mBAAqB,WAC3BnjD,KAAK87D,eAAiB,EACtB97D,KAAKqtE,aAAe,EACpBrtE,KAAK+sE,WAAWN,IASlB7sE,EAAQsjD,kBAAoB,WAC1BljD,KAAKkvD,WACLlvD,KAAKstE,cAAgB,WACrBttE,KAAKkvD,QAAgB,UACrBlvD,KAAKkvD,QAAgB,OAAE,YAAclS,SACnCc,SACAgG,eACAsY,eAAkB,EAClBmR,YAAe/mE,QACjBxG,KAAKkvD,QAAgB,UACrBlvD,KAAKkvD,QAAiB,SAAKlS,SACzBc,SACAgG,eACAsY,eAAkB,EAClBmR,YAAe/mE,QAEjBxG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAE,WAAwB,YAElElvD,KAAK+sE,WAAWL,IASlB9sE,EAAQwjD,qBAAuB,WAC7BpjD,KAAKkrD,cAAgBlO,SAAWc,UAEhC99C,KAAK+sE,WAAWJ,IASlB/sE,EAAQ6oD,wBAA0B,WAEhCzoD,KAAKwtE,8BAA+B,EACpCxtE,KAAKytE,sBAAuB,EAEmB,GAA3CztE,KAAK0hD,UAAUnB,iBAAiB7xC,SAELlI,SAAzBxG,KAAK0tE,kBACP1tE,KAAK0tE,gBAAkBn8D,SAASM,cAAc,OAC9C7R,KAAK0tE,gBAAgB7lE,UAAY,0BAE/B7H,KAAK0tE,gBAAgBzgE,MAAM26B,QADR,GAAjB5nC,KAAKkoD,SAC8B,QAGA,OAEvCloD,KAAKsf,MAAM7N,YAAYzR,KAAK0tE,kBAGLlnE,SAArBxG,KAAK2tE,cACP3tE,KAAK2tE,YAAcp8D,SAASM,cAAc,OAC1C7R,KAAK2tE,YAAY9lE,UAAY,gCAE3B7H,KAAK2tE,YAAY1gE,MAAM26B,QADJ,GAAjB5nC,KAAKkoD,SAC0B,OAGA,QAEnCloD,KAAKsf,MAAM7N,YAAYzR,KAAK2tE,cAGRnnE,SAAlBxG,KAAK4tE,WACP5tE,KAAK4tE,SAAWr8D,SAASM,cAAc,OACvC7R,KAAK4tE,SAAS/lE,UAAY,gCAC1B7H,KAAK4tE,SAAS3gE,MAAM26B,QAAU5nC,KAAK0tE,gBAAgBzgE,MAAM26B,QACzD5nC,KAAKsf,MAAM7N,YAAYzR,KAAK4tE,WAI9B5tE,KAAK+sE,WAAWH,GAGhB5sE,KAAKmnD,yBAGwB3gD,SAAzBxG,KAAK0tE,kBAEP1tE,KAAKmnD,wBAGLnnD,KAAKsf,MAAMnO,YAAYnR,KAAK0tE,iBAC5B1tE,KAAKsf,MAAMnO,YAAYnR,KAAK2tE,aAC5B3tE,KAAKsf,MAAMnO,YAAYnR,KAAK4tE,UAE5B5tE,KAAK0tE,gBAAkBlnE,OACvBxG,KAAK2tE,YAAcnnE,OACnBxG,KAAK4tE,SAAWpnE,OAEhBxG,KAAKktE,YAAYN,KAWvBhtE,EAAQ4oD,wBAA0B,WAChCxoD,KAAK+sE,WAAWF,GAEhB7sE,KAAK6tE,mBACoC,GAArC7tE,KAAK0hD,UAAUtB,WAAW1xC,SAC5B1O,KAAK8tE,2BAUTluE,EAAQyjD,qBAAuB,WAC7BrjD,KAAK+sE,WAAWD,KAMd,SAASjtE,EAAQD,EAASM,GAiB9B,QAASolD,GAAU/rC,GACjBvZ,KAAKszD,QAAS,EAEdtzD,KAAK+vB,KACHxW,UAAWA,GAGbvZ,KAAK+vB,IAAIg+C,QAAUx8D,SAASM,cAAc,OAC1C7R,KAAK+vB,IAAIg+C,QAAQlmE,UAAY,UAE7B7H,KAAK+vB,IAAIxW,UAAU9H,YAAYzR,KAAK+vB,IAAIg+C,SAExC/tE,KAAK8D,OAASC,EAAO/D,KAAK+vB,IAAIg+C,SAAUC,iBAAiB,IACzDhuE,KAAK8D,OAAOyP,GAAG,MAAOvT,KAAKiuE,cAAcn5C,KAAK90B,MAG9C,IAAImU,GAAKnU,KACLwlE,GACF,QAAS,QACT,YAAa,OACb,YAAa,OAAQ,UACrB,aAAc,iBAEhBA,GAAOn9D,QAAQ,SAAUiB,GACvB6K,EAAGrQ,OAAOyP,GAAGjK,EAAO,SAAUA,GAC5BA,EAAMq8B,sBAKV3lC,KAAKkuE,aAAenqE,EAAOwF,QAASykE,iBAAiB,IACrDhuE,KAAKkuE,aAAa36D,GAAG,MAAO,SAAUjK,GAE/B6kE,EAAW7kE,EAAMI,OAAQ6P,IAC5BpF,EAAGi6D,eAIe5nE,SAAlBxG,KAAKmlD,UACPnlD,KAAKmlD,SAAS7xC,UAEhBtT,KAAKmlD,SAAWA,IAGhBnlD,KAAKquE,YAAcruE,KAAKouE,WAAWt5C,KAAK90B,MAiF1C,QAASmuE,GAAWvlE,EAASk8B,GAC3B,KAAOl8B,GAAS,CACd,GAAIA,IAAYk8B,EACd,OAAO,CAETl8B,GAAUA,EAAQiB,WAEpB,OAAO,EAnJT,GAAIs7C,GAAWjlD,EAAoB,IAC/B6c,EAAU7c,EAAoB,IAC9B6D,EAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,EA4D/B6c,GAAQuoC,EAAUnyC,WAGlBmyC,EAAUxrB,QAAU,KAKpBwrB,EAAUnyC,UAAUG,QAAU,WAC5BtT,KAAKouE,aAGLpuE,KAAK+vB,IAAIg+C,QAAQlkE,WAAWsH,YAAYnR,KAAK+vB,IAAIg+C,SAGjD/tE,KAAK8D,OAAS,KACd9D,KAAKkuE,aAAe,MAQtB5oB,EAAUnyC,UAAUm7D,SAAW,WAEzBhpB,EAAUxrB,SACZwrB,EAAUxrB,QAAQs0C,aAEpB9oB,EAAUxrB,QAAU95B,KAEpBA,KAAKszD,QAAS,EACdtzD,KAAK+vB,IAAIg+C,QAAQ9gE,MAAM26B,QAAU,OACjCjnC,EAAKiH,aAAa5H,KAAK+vB,IAAIxW,UAAW,cAEtCvZ,KAAK4tB,KAAK,UACV5tB,KAAK4tB,KAAK,YAIV5tB,KAAKmlD,SAASrwB,KAAK,MAAO90B,KAAKquE,cAOjC/oB,EAAUnyC,UAAUi7D,WAAa,WAC/BpuE,KAAKszD,QAAS,EACdtzD,KAAK+vB,IAAIg+C,QAAQ9gE,MAAM26B,QAAU,GACjCjnC,EAAKuH,gBAAgBlI,KAAK+vB,IAAIxW,UAAW,cACzCvZ,KAAKmlD,SAASopB,OAAO,MAAOvuE,KAAKquE,aAEjCruE,KAAK4tB,KAAK,UACV5tB,KAAK4tB,KAAK,eAQZ03B,EAAUnyC,UAAU86D,cAAgB,SAAU3kE,GAE5CtJ,KAAKsuE,WACLhlE,EAAMq8B,mBAsBR9lC,EAAOD,QAAU0lD,GAKb,SAASzlD,GAeb,QAASkd,GAAQgG,GACf,MAAIA,GAAYqvC,EAAMrvC,GAAtB,OAWF,QAASqvC,GAAMrvC,GACb,IAAK,GAAIra,KAAOqU,GAAQ5J,UACtB4P,EAAIra,GAAOqU,EAAQ5J,UAAUzK,EAE/B,OAAOqa,GAxBTljB,EAAOD,QAAUmd,EAoCjBA,EAAQ5J,UAAUI,GAClBwJ,EAAQ5J,UAAUxK,iBAAmB,SAASW,EAAO4P,GAInD,MAHAlZ,MAAKwuE,WAAaxuE,KAAKwuE,gBACtBxuE,KAAKwuE,WAAWllE,GAAStJ,KAAKwuE,WAAWllE,QACvCtB,KAAKkR,GACDlZ,MAaT+c,EAAQ5J,UAAUs7D,KAAO,SAASnlE,EAAO4P,GAIvC,QAAS3F,KACPm7D,EAAKh7D,IAAIpK,EAAOiK,GAChB2F,EAAGnB,MAAM/X,KAAM0F,WALjB,GAAIgpE,GAAO1uE,IAUX,OATAA,MAAKwuE,WAAaxuE,KAAKwuE,eAOvBj7D,EAAG2F,GAAKA,EACRlZ,KAAKuT,GAAGjK,EAAOiK,GACRvT,MAaT+c,EAAQ5J,UAAUO,IAClBqJ,EAAQ5J,UAAUw7D,eAClB5xD,EAAQ5J,UAAUy7D,mBAClB7xD,EAAQ5J,UAAUhK,oBAAsB,SAASG,EAAO4P,GAItD,GAHAlZ,KAAKwuE,WAAaxuE,KAAKwuE,eAGnB,GAAK9oE,UAAUC,OAEjB,MADA3F,MAAKwuE,cACExuE,IAIT,IAAI6uE,GAAY7uE,KAAKwuE,WAAWllE,EAChC,KAAKulE,EAAW,MAAO7uE,KAGvB,IAAI,GAAK0F,UAAUC,OAEjB,aADO3F,MAAKwuE,WAAWllE,GAChBtJ,IAKT,KAAK,GADD8uE,GACKtpE,EAAI,EAAGA,EAAIqpE,EAAUlpE,OAAQH,IAEpC,GADAspE,EAAKD,EAAUrpE,GACXspE,IAAO51D,GAAM41D,EAAG51D,KAAOA,EAAI,CAC7B21D,EAAUzmE,OAAO5C,EAAG,EACpB,OAGJ,MAAOxF,OAWT+c,EAAQ5J,UAAUya,KAAO,SAAStkB,GAChCtJ,KAAKwuE,WAAaxuE,KAAKwuE,cACvB,IAAIv1D,MAAUhO,MAAM1K,KAAKmF,UAAW,GAChCmpE,EAAY7uE,KAAKwuE,WAAWllE,EAEhC,IAAIulE,EAAW,CACbA,EAAYA,EAAU5jE,MAAM,EAC5B,KAAK,GAAIzF,GAAI,EAAGC,EAAMopE,EAAUlpE,OAAYF,EAAJD,IAAWA,EACjDqpE,EAAUrpE,GAAGuS,MAAM/X,KAAMiZ,GAI7B,MAAOjZ,OAWT+c,EAAQ5J,UAAUoyD,UAAY,SAASj8D,GAErC,MADAtJ,MAAKwuE,WAAaxuE,KAAKwuE,eAChBxuE,KAAKwuE,WAAWllE,QAWzByT,EAAQ5J,UAAU47D,aAAe,SAASzlE,GACxC,QAAUtJ,KAAKulE,UAAUj8D,GAAO3D,SAM9B,SAAS9F,EAAQD,EAASM,GAE9B,GAAI8uE,IAA0D,SAASC,EAAQpvE,IAM/E,SAAW2G,GA+RP,QAAS0oE,GAAI3pE,EAAGa,EAAG3F,GACf,OAAQiF,UAAUC,QACd,IAAK,GAAG,MAAY,OAALJ,EAAYA,EAAIa,CAC/B,KAAK,GAAG,MAAY,OAALb,EAAYA,EAAS,MAALa,EAAYA,EAAI3F,CAC/C,SAAS,KAAM,IAAImD,OAAM,iBAIjC,QAASurE,GAAW5pE,EAAGa,GACnB,MAAON,IAAevF,KAAKgF,EAAGa,GAGlC,QAASgpE,KAGL,OACIC,OAAQ,EACRC,gBACAC,eACA1rD,SAAW,GACX2rD,cAAgB,EAChBC,WAAY,EACZC,aAAe,KACfC,eAAgB,EAChBC,iBAAkB,EAClBC,KAAK,GAIb,QAASC,GAASC,GACVlsE,GAAOmsE,+BAAgC,GAChB,mBAAZr3C,UAA2BA,QAAQs3C,MAC9Ct3C,QAAQs3C,KAAK,wBAA0BF,GAI/C,QAASG,GAAUH,EAAK72D,GACpB,GAAIi3D,IAAY,CAChB,OAAO7qE,GAAO,WAKV,MAJI6qE,KACAL,EAASC,GACTI,GAAY,GAETj3D,EAAGnB,MAAM/X,KAAM0F,YACvBwT,GAGP,QAASk3D,GAAgBn6D,EAAM85D,GACtBM,GAAap6D,KACd65D,EAASC,GACTM,GAAap6D,IAAQ,GAI7B,QAASq6D,GAASC,EAAMv5D,GACpB,MAAO,UAAUzR,GACb,MAAOirE,GAAaD,EAAKhwE,KAAKP,KAAMuF,GAAIyR,IAGhD,QAASy5D,GAAgBF,EAAMG,GAC3B,MAAO,UAAUnrE,GACb,MAAOvF,MAAK2wE,aAAaC,QAAQL,EAAKhwE,KAAKP,KAAMuF,GAAImrE,IAI7D,QAASG,GAAUtrE,EAAGa,GAElB,GAGI0qE,GAASC,EAHTC,EAA0C,IAAvB5qE,EAAEmyB,OAAShzB,EAAEgzB,SAAiBnyB,EAAEsyB,QAAUnzB,EAAEmzB,SAE/D+hB,EAASl1C,EAAE6yB,QAAQnlB,IAAI+9D,EAAgB,SAa3C,OAViB,GAAb5qE,EAAIq0C,GACJq2B,EAAUvrE,EAAE6yB,QAAQnlB,IAAI+9D,EAAiB,EAAG,UAE5CD,GAAU3qE,EAAIq0C,IAAWA,EAASq2B,KAElCA,EAAUvrE,EAAE6yB,QAAQnlB,IAAI+9D,EAAiB,EAAG,UAE5CD,GAAU3qE,EAAIq0C,IAAWq2B,EAAUr2B,MAG9Bu2B,EAAiBD,GAc9B,QAASE,GAAgB5sC,EAAQzC,EAAMsvC,GACnC,GAAIC,EAEJ,OAAgB,OAAZD,EAEOtvC,EAEgB,MAAvByC,EAAO+sC,aACA/sC,EAAO+sC,aAAaxvC,EAAMsvC,GACX,MAAf7sC,EAAOgtC,MAEdF,EAAO9sC,EAAOgtC,KAAKH,GACfC,GAAe,GAAPvvC,IACRA,GAAQ,IAEPuvC,GAAiB,KAATvvC,IACTA,EAAO,GAEJA,GAGAA,EAQf,QAAS0vC,MAIT,QAASC,GAAOC,EAAQC,GAChBA,KAAiB,GACjBC,EAAcF,GAElBG,EAAW3xE,KAAMwxE,GACjBxxE,KAAKk4B,GAAK,GAAI5zB,OAAMktE,EAAOt5C,IAGvB05C,MAAqB,IACrBA,IAAmB,EACnB/tE,GAAOguE,aAAa7xE,MACpB4xE,IAAmB,GAK3B,QAASE,GAAShiE,GACd,GAAIiiE,GAAkBC,EAAqBliE,GACvCmiE,EAAQF,EAAgBx5C,MAAQ,EAChC25C,EAAWH,EAAgBI,SAAW,EACtCC,EAASL,EAAgBr5C,OAAS,EAClC25C,EAAQN,EAAgBO,MAAQ,EAChCC,EAAOR,EAAgB15C,KAAO,EAC9B+E,EAAQ20C,EAAgBnwC,MAAQ,EAChCvE,EAAU00C,EAAgBpwC,QAAU,EACpCrE,EAAUy0C,EAAgBrwC,QAAU,EACpCnE,EAAew0C,EAAgBtwC,aAAe,CAGlDzhC,MAAKwyE,eAAiBj1C,EACR,IAAVD,EACU,IAAVD,EACQ,KAARD,EAGJp9B,KAAKyyE,OAASF,EACF,EAARF,EAIJryE,KAAK0yE,SAAWN,EACD,EAAXF,EACQ,GAARD,EAEJjyE,KAAK4S,SAEL5S,KAAK2yE,QAAU9uE,GAAO8sE,aAEtB3wE,KAAK4yE,UAQT,QAASttE,GAAOC,EAAGa,GACf,IAAK,GAAIZ,KAAKY,GACN+oE,EAAW/oE,EAAGZ,KACdD,EAAEC,GAAKY,EAAEZ,GAYjB,OARI2pE,GAAW/oE,EAAG,cACdb,EAAEF,SAAWe,EAAEf,UAGf8pE,EAAW/oE,EAAG,aACdb,EAAEyB,QAAUZ,EAAEY,SAGXzB,EAGX,QAASosE,GAAWtoD,EAAID,GACpB,GAAI5jB,GAAGK,EAAMgtE,CAiCb,IA/BqC,mBAA1BzpD,GAAK0pD,mBACZzpD,EAAGypD,iBAAmB1pD,EAAK0pD,kBAER,mBAAZ1pD,GAAK2pD,KACZ1pD,EAAG0pD,GAAK3pD,EAAK2pD,IAEM,mBAAZ3pD,GAAK4pD,KACZ3pD,EAAG2pD,GAAK5pD,EAAK4pD,IAEM,mBAAZ5pD,GAAK6pD,KACZ5pD,EAAG4pD,GAAK7pD,EAAK6pD,IAEW,mBAAjB7pD,GAAK8pD,UACZ7pD,EAAG6pD,QAAU9pD,EAAK8pD,SAEG,mBAAd9pD,GAAK+pD,OACZ9pD,EAAG8pD,KAAO/pD,EAAK+pD,MAEQ,mBAAhB/pD,GAAKgqD,SACZ/pD,EAAG+pD,OAAShqD,EAAKgqD,QAEO,mBAAjBhqD,GAAKiqD,UACZhqD,EAAGgqD,QAAUjqD,EAAKiqD,SAEE,mBAAbjqD,GAAKkqD,MACZjqD,EAAGiqD,IAAMlqD,EAAKkqD,KAEU,mBAAjBlqD,GAAKupD,UACZtpD,EAAGspD,QAAUvpD,EAAKupD,SAGlBY,GAAiB5tE,OAAS,EAC1B,IAAKH,IAAK+tE,IACN1tE,EAAO0tE,GAAiB/tE,GACxBqtE,EAAMzpD,EAAKvjB,GACQ,mBAARgtE,KACPxpD,EAAGxjB,GAAQgtE,EAKvB,OAAOxpD,GAGX,QAASmqD,GAASC,GACd,MAAa,GAATA,EACOvuE,KAAKwyC,KAAK+7B,GAEVvuE,KAAKC,MAAMsuE,GAM1B,QAASjD,GAAaiD,EAAQC,EAAcC,GAIxC,IAHA,GAAIC,GAAS,GAAK1uE,KAAK2lB,IAAI4oD,GACvBzkD,EAAOykD,GAAU,EAEdG,EAAOjuE,OAAS+tE,GACnBE,EAAS,IAAMA,CAEnB,QAAQ5kD,EAAQ2kD,EAAY,IAAM,GAAM,KAAOC,EAGnD,QAASC,GAA0BC,EAAMluE,GACrC,GAAImuE,IAAOx2C,aAAc,EAAG60C,OAAQ,EAUpC,OARA2B,GAAI3B,OAASxsE,EAAM8yB,QAAUo7C,EAAKp7C,QACC,IAA9B9yB,EAAM2yB,OAASu7C,EAAKv7C,QACrBu7C,EAAK17C,QAAQnlB,IAAI8gE,EAAI3B,OAAQ,KAAK4B,QAAQpuE,MACxCmuE,EAAI3B,OAGV2B,EAAIx2C,cAAgB33B,GAAUkuE,EAAK17C,QAAQnlB,IAAI8gE,EAAI3B,OAAQ,KAEpD2B,EAGX,QAASE,GAAkBH,EAAMluE,GAC7B,GAAImuE,EAUJ,OATAnuE,GAAQsuE,EAAOtuE,EAAOkuE,GAClBA,EAAKK,SAASvuE,GACdmuE,EAAMF,EAA0BC,EAAMluE,IAEtCmuE,EAAMF,EAA0BjuE,EAAOkuE,GACvCC,EAAIx2C,cAAgBw2C,EAAIx2C,aACxBw2C,EAAI3B,QAAU2B,EAAI3B,QAGf2B,EAIX,QAASK,GAAYl5C,EAAWjlB,GAC5B,MAAO,UAAU48D,EAAKnC,GAClB,GAAI2D,GAAKC,CAUT,OARe,QAAX5D,GAAoBhsE,OAAOgsE,KAC3BN,EAAgBn6D,EAAM,YAAcA,EAAQ,uDAAyDA,EAAO,qBAC5Gq+D,EAAMzB,EAAKA,EAAMnC,EAAQA,EAAS4D,GAGtCzB,EAAqB,gBAARA,IAAoBA,EAAMA,EACvCwB,EAAMxwE,GAAOiM,SAAS+iE,EAAKnC,GAC3B6D,EAAgCv0E,KAAMq0E,EAAKn5C,GACpCl7B,MAIf,QAASu0E,GAAgCC,EAAK1kE,EAAU2kE,EAAU5C,GAC9D,GAAIt0C,GAAeztB,EAAS0iE,cACxBD,EAAOziE,EAAS2iE,MAChBL,EAAStiE,EAAS4iE,OACtBb,GAA+B,MAAhBA,GAAuB,EAAOA,EAEzCt0C,GACAi3C,EAAIt8C,GAAGw8C,SAASF,EAAIt8C,GAAKqF,EAAek3C,GAExClC,GACAoC,GAAUH,EAAK,OAAQI,GAAUJ,EAAK,QAAUjC,EAAOkC,GAEvDrC,GACAyC,GAAeL,EAAKI,GAAUJ,EAAK,SAAWpC,EAASqC,GAEvD5C,GACAhuE,GAAOguE,aAAa2C,EAAKjC,GAAQH,GAKzC,QAASlsE,GAAQ4uE,GACb,MAAiD,mBAA1CvuE,OAAO4M,UAAU9N,SAAS9E,KAAKu0E,GAG1C,QAASzwE,GAAOywE,GACZ,MAAiD,kBAA1CvuE,OAAO4M,UAAU9N,SAAS9E,KAAKu0E,IAClCA,YAAiBxwE,MAIzB,QAASywE,GAAc/R,EAAQC,EAAQ+R,GACnC,GAGIxvE,GAHAC,EAAMP,KAAK4G,IAAIk3D,EAAOr9D,OAAQs9D,EAAOt9D,QACrCsvE,EAAa/vE,KAAK2lB,IAAIm4C,EAAOr9D,OAASs9D,EAAOt9D,QAC7CuvE,EAAQ,CAEZ,KAAK1vE,EAAI,EAAOC,EAAJD,EAASA,KACZwvE,GAAehS,EAAOx9D,KAAOy9D,EAAOz9D,KACnCwvE,GAAeG,EAAMnS,EAAOx9D,MAAQ2vE,EAAMlS,EAAOz9D,MACnD0vE,GAGR,OAAOA,GAAQD,EAGnB,QAASG,GAAeC,GACpB,GAAIA,EAAO,CACP,GAAIC,GAAUD,EAAM9wC,cAAc/5B,QAAQ,QAAS,KACnD6qE,GAAQE,GAAYF,IAAUG,GAAeF,IAAYA,EAE7D,MAAOD,GAGX,QAASrD,GAAqByD,GAC1B,GACIC,GACA7vE,EAFAksE,IAIJ,KAAKlsE,IAAQ4vE,GACLtG,EAAWsG,EAAa5vE,KACxB6vE,EAAiBN,EAAevvE,GAC5B6vE,IACA3D,EAAgB2D,GAAkBD,EAAY5vE,IAK1D,OAAOksE,GAGX,QAAS4D,GAAS7mE,GACd,GAAIkI,GAAO4+D,CAEX,IAA8B,IAA1B9mE,EAAMnI,QAAQ,QACdqQ,EAAQ,EACR4+D,EAAS,UAER,CAAA,GAA+B,IAA3B9mE,EAAMnI,QAAQ,SAKnB,MAJAqQ,GAAQ,GACR4+D,EAAS,QAMb/xE,GAAOiL,GAAS,SAAUwyB,EAAQn5B,GAC9B,GAAI3C,GAAGqwE,EACH78D,EAASnV,GAAO8uE,QAAQ7jE,GACxBgnE,IAYJ,IAVsB,gBAAXx0C,KACPn5B,EAAQm5B,EACRA,EAAS96B,GAGbqvE,EAAS,SAAUrwE,GACf,GAAIhF,GAAIqD,KAASkyE,MAAM5sB,IAAIysB,EAAQpwE,EACnC,OAAOwT,GAAOzY,KAAKsD,GAAO8uE,QAASnyE,EAAG8gC,GAAU,KAGvC,MAATn5B,EACA,MAAO0tE,GAAO1tE,EAGd,KAAK3C,EAAI,EAAOwR,EAAJxR,EAAWA,IACnBswE,EAAQ9tE,KAAK6tE,EAAOrwE,GAExB,OAAOswE,IAKnB,QAASX,GAAMa,GACX,GAAIC,IAAiBD,EACjB3uE,EAAQ,CAUZ,OARsB,KAAlB4uE,GAAuBC,SAASD,KAE5B5uE,EADA4uE,GAAiB,EACT/wE,KAAKC,MAAM8wE,GAEX/wE,KAAKwyC,KAAKu+B,IAInB5uE,EAGX,QAAS8uE,GAAY59C,EAAMG,GACvB,MAAO,IAAIp0B,MAAKA,KAAK8xE,IAAI79C,EAAMG,EAAQ,EAAG,IAAI29C,aAGlD,QAASC,GAAY/9C,EAAMg+C,EAAKC,GAC5B,MAAOC,IAAW5yE,IAAQ00B,EAAM,GAAI,GAAKg+C,EAAMC,IAAOD,EAAKC,GAAKlE,KAGpE,QAASoE,GAAWn+C,GAChB,MAAOo+C,GAAWp+C,GAAQ,IAAM,IAGpC,QAASo+C,GAAWp+C,GAChB,MAAQA,GAAO,IAAM,GAAKA,EAAO,MAAQ,GAAMA,EAAO,MAAQ,EAGlE,QAASm5C,GAAclxE,GACnB,GAAIqjB,EACArjB,GAAEo2E,IAAyB,KAAnBp2E,EAAE8yE,IAAIzvD,WACdA,EACIrjB,EAAEo2E,GAAGC,IAAS,GAAKr2E,EAAEo2E,GAAGC,IAAS,GAAKA,GACtCr2E,EAAEo2E,GAAGE,IAAQ,GAAKt2E,EAAEo2E,GAAGE,IAAQX,EAAY31E,EAAEo2E,GAAGG,IAAOv2E,EAAEo2E,GAAGC,KAAUC,GACtEt2E,EAAEo2E,GAAGI,IAAQ,GAAKx2E,EAAEo2E,GAAGI,IAAQ,IACX,KAAfx2E,EAAEo2E,GAAGI,MAAkC,IAAjBx2E,EAAEo2E,GAAGK,KACY,IAAjBz2E,EAAEo2E,GAAGM,KACiB,IAAtB12E,EAAEo2E,GAAGO,KAAuBH,GACvDx2E,EAAEo2E,GAAGK,IAAU,GAAKz2E,EAAEo2E,GAAGK,IAAU,GAAKA,GACxCz2E,EAAEo2E,GAAGM,IAAU,GAAK12E,EAAEo2E,GAAGM,IAAU,GAAKA,GACxC12E,EAAEo2E,GAAGO,IAAe,GAAK32E,EAAEo2E,GAAGO,IAAe,IAAMA,GACnD,GAEA32E,EAAE8yE,IAAI8D,qBAAkCL,GAAXlzD,GAAmBA,EAAWizD,MAC3DjzD,EAAWizD,IAGft2E,EAAE8yE,IAAIzvD,SAAWA,GAIzB,QAASwzD,GAAQ72E,GAiBb,MAhBkB,OAAdA,EAAE82E,WACF92E,EAAE82E,UAAY5yE,MAAMlE,EAAE03B,GAAGq/C,YACrB/2E,EAAE8yE,IAAIzvD,SAAW,IAChBrjB,EAAE8yE,IAAIjE,QACN7uE,EAAE8yE,IAAI5D,eACNlvE,EAAE8yE,IAAI7D,YACNjvE,EAAE8yE,IAAI3D,gBACNnvE,EAAE8yE,IAAI1D,gBAEPpvE,EAAE0yE,UACF1yE,EAAE82E,SAAW92E,EAAE82E,UACa,IAAxB92E,EAAE8yE,IAAI9D,eACwB,IAA9BhvE,EAAE8yE,IAAIhE,aAAa3pE,QACnBnF,EAAE8yE,IAAIkE,UAAYhxE,IAGvBhG,EAAE82E,SAGb,QAASG,GAAgB/uE,GACrB,MAAOA,GAAMA,EAAI67B,cAAc/5B,QAAQ,IAAK,KAAO9B,EAMvD,QAASgvE,GAAaC,GAGlB,IAFA,GAAW/rD,GAAGvD,EAAMgc,EAAQt8B,EAAxBvC,EAAI,EAEDA,EAAImyE,EAAMhyE,QAAQ,CAKrB,IAJAoC,EAAQ0vE,EAAgBE,EAAMnyE,IAAIuC,MAAM,KACxC6jB,EAAI7jB,EAAMpC,OACV0iB,EAAOovD,EAAgBE,EAAMnyE,EAAI,IACjC6iB,EAAOA,EAAOA,EAAKtgB,MAAM,KAAO,KACzB6jB,EAAI,GAAG,CAEV,GADAyY,EAASuzC,EAAW7vE,EAAMkD,MAAM,EAAG2gB,GAAG3jB,KAAK,MAEvC,MAAOo8B,EAEX,IAAIhc,GAAQA,EAAK1iB,QAAUimB,GAAKmpD,EAAchtE,EAAOsgB,GAAM,IAASuD,EAAI,EAEpE,KAEJA,KAEJpmB,IAEJ,MAAO,MAGX,QAASoyE,GAAW3hE,GAChB,GAAI4hE,GAAY,IAChB,KAAKhzC,GAAQ5uB,IAAS6hE,GAClB,IACID,EAAYh0E,GAAOwgC,UACjB,WAAkC,GAAIvN,GAAI,GAAIlzB,OAAM,gCAAiE,MAA7BkzB,GAAEihD,KAAO,mBAA0BjhD,KAE7HjzB,GAAOwgC,OAAOwzC,GAChB,MAAO/gD,IAEb,MAAO+N,IAAQ5uB,GAKnB,QAASi+D,GAAOY,EAAOkD,GACnB,GAAIjE,GAAK1nD,CACT,OAAI2rD,GAAM5E,QACNW,EAAMiE,EAAM5/C,QACZ/L,GAAQxoB,GAAOoD,SAAS6tE,IAAUzwE,EAAOywE,IAChCA,GAASjxE,GAAOixE,KAAYf,EAErCA,EAAI77C,GAAGw8C,SAASX,EAAI77C,GAAK7L,GACzBxoB,GAAOguE,aAAakC,GAAK,GAClBA,GAEAlwE,GAAOixE,GAAOmD,QA6N7B,QAASC,GAAuBpD,GAC5B,MAAIA,GAAMvwE,MAAM,YACLuwE,EAAMtqE,QAAQ,WAAY,IAE9BsqE,EAAMtqE,QAAQ,MAAO,IAGhC,QAAS2tE,GAAmB72C,GACxB,GAA4C97B,GAAGG,EAA3C6C,EAAQ84B,EAAO/8B,MAAM6zE,GAEzB,KAAK5yE,EAAI,EAAGG,EAAS6C,EAAM7C,OAAYA,EAAJH,EAAYA,IAEvCgD,EAAMhD,GADN6yE,GAAqB7vE,EAAMhD,IAChB6yE,GAAqB7vE,EAAMhD,IAE3B0yE,EAAuB1vE,EAAMhD,GAIhD,OAAO,UAAUgvE,GACb,GAAIZ,GAAS,EACb,KAAKpuE,EAAI,EAAOG,EAAJH,EAAYA,IACpBouE,GAAUprE,EAAMhD,YAAcwrC,UAAWxoC,EAAMhD,GAAGjF,KAAKi0E,EAAKlzC,GAAU94B,EAAMhD,EAEhF,OAAOouE,IAKf,QAAS0E,GAAa93E,EAAG8gC,GACrB,MAAK9gC,GAAE62E,WAIP/1C,EAASi3C,EAAaj3C,EAAQ9gC,EAAEmwE,cAE3B6H,GAAgBl3C,KACjBk3C,GAAgBl3C,GAAU62C,EAAmB72C,IAG1Ck3C,GAAgBl3C,GAAQ9gC,IATpBA,EAAEmwE,aAAa8H,cAY9B,QAASF,GAAaj3C,EAAQ+C,GAG1B,QAASq0C,GAA4B5D,GACjC,MAAOzwC,GAAOs0C,eAAe7D,IAAUA,EAH3C,GAAItvE,GAAI,CAOR,KADAozE,GAAsBC,UAAY,EAC3BrzE,GAAK,GAAKozE,GAAsB5qE,KAAKszB,IACxCA,EAASA,EAAO92B,QAAQouE,GAAuBF,GAC/CE,GAAsBC,UAAY,EAClCrzE,GAAK,CAGT,OAAO87B,GAUX,QAASw3C,GAAsBpX,EAAO8P,GAClC,GAAIjsE,GAAG48D,EAASqP,EAAO0B,OACvB,QAAQxR,GACR,IAAK,IACD,MAAOqX,GACX,KAAK,OACD,MAAOC,GACX,KAAK,OACL,IAAK,OACL,IAAK,OACD,MAAO7W,GAAS8W,GAAuBC,EAC3C,KAAK,IACL,IAAK,IACL,IAAK,IACD,MAAOC,GACX,KAAK,SACL,IAAK,QACL,IAAK,QACL,IAAK,QACD,MAAOhX,GAASiX,GAAsBC,EAC1C,KAAK,IACD,GAAIlX,EACA,MAAO4W,GAGf,KAAK,KACD,GAAI5W,EACA,MAAOmX,GAGf,KAAK,MACD,GAAInX,EACA,MAAO6W,GAGf,KAAK,MACD,MAAOO,GACX,KAAK,MACL,IAAK,OACL,IAAK,KACL,IAAK,MACL,IAAK,OACD,MAAOC,GACX,KAAK,IACL,IAAK,IACD,MAAOhI,GAAOmB,QAAQ8G,cAC1B,KAAK,IACD,MAAOC,GACX,KAAK,IACD,MAAOC,GACX,KAAK,IACL,IAAK,KACD,MAAOC,GACX,KAAK,IACD,MAAOC,GACX,KAAK,OACD,MAAOC,GACX,KAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACD,MAAO3X,GAASmX,GAAsBS,EAC1C,KAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACD,MAAOA,GACX,KAAK,KACD,MAAO5X,GAASqP,EAAOmB,QAAQqH,cAAgBxI,EAAOmB,QAAQsH,oBAClE,SAEI,MADA10E,GAAI,GAAI20E,QAAOC,GAAaC,GAAe1Y,EAAMl3D,QAAQ,KAAM,KAAM,OAK7E,QAAS6vE,GAAoBC,GACzBA,EAASA,GAAU,EACnB,IAAIC,GAAqBD,EAAO/1E,MAAMq1E,QAClCY,EAAUD,EAAkBA,EAAkB50E,OAAS,OACvDwH,GAASqtE,EAAU,IAAIj2E,MAAMk2E,MAA0B,IAAK,EAAG,GAC/Dp9C,IAAuB,GAAXlwB,EAAM,IAAWgoE,EAAMhoE,EAAM,GAE7C,OAAoB,MAAbA,EAAM,GAAakwB,GAAWA,EAIzC,QAASq9C,GAAwBhZ,EAAOoT,EAAOtD,GAC3C,GAAIjsE,GAAGo1E,EAAgBnJ,EAAOoF,EAE9B,QAAQlV,GAER,IAAK,IACY,MAAToT,IACA6F,EAAc9D,IAA8B,GAApB1B,EAAML,GAAS,GAE3C,MAEJ,KAAK,IACL,IAAK,KACY,MAATA,IACA6F,EAAc9D,IAAS1B,EAAML,GAAS,EAE1C,MACJ,KAAK,MACL,IAAK,OACDvvE,EAAIisE,EAAOmB,QAAQiI,YAAY9F,EAAOpT,EAAO8P,EAAO0B,SAE3C,MAAL3tE,EACAo1E,EAAc9D,IAAStxE,EAEvBisE,EAAO8B,IAAI5D,aAAeoF,CAE9B,MAEJ,KAAK,IACL,IAAK,KACY,MAATA,IACA6F,EAAc7D,IAAQ3B,EAAML,GAEhC,MACJ,KAAK,KACY,MAATA,IACA6F,EAAc7D,IAAQ3B,EAAMvqE,SAChBkqE,EAAMvwE,MAAM,WAAW,GAAI,KAE3C,MAEJ,KAAK,MACL,IAAK,OACY,MAATuwE,IACAtD,EAAOqJ,WAAa1F,EAAML,GAG9B,MAEJ,KAAK,KACD6F,EAAc5D,IAAQlzE,GAAOi3E,kBAAkBhG,EAC/C,MACJ,KAAK,OACL,IAAK,QACL,IAAK,SACD6F,EAAc5D,IAAQ5B,EAAML,EAC5B,MAEJ,KAAK,IACL,IAAK,IACDtD,EAAOuJ,UAAYjG,CAEnB,MAEJ,KAAK,IACL,IAAK,KACDtD,EAAO8B,IAAIkE,SAAU,CAEzB,KAAK,IACL,IAAK,KACDmD,EAAc3D,IAAQ7B,EAAML,EAC5B,MAEJ,KAAK,IACL,IAAK,KACD6F,EAAc1D,IAAU9B,EAAML,EAC9B,MAEJ,KAAK,IACL,IAAK,KACD6F,EAAczD,IAAU/B,EAAML,EAC9B,MAEJ,KAAK,IACL,IAAK,KACL,IAAK,MACL,IAAK,OACD6F,EAAcxD,IAAehC,EAAuB,KAAhB,KAAOL,GAC3C,MAEJ,KAAK,IACDtD,EAAOt5C,GAAK,GAAI5zB,MAAK6wE,EAAML,GAC3B,MAEJ,KAAK,IACDtD,EAAOt5C,GAAK,GAAI5zB,MAAyB,IAApB+gB,WAAWyvD,GAChC,MAEJ,KAAK,IACL,IAAK,KACDtD,EAAOwJ,SAAU,EACjBxJ,EAAO2B,KAAOkH,EAAoBvF,EAClC,MAEJ,KAAK,KACL,IAAK,MACL,IAAK,OACDvvE,EAAIisE,EAAOmB,QAAQsI,cAAcnG,GAExB,MAALvvE,GACAisE,EAAO0J,GAAK1J,EAAO0J,OACnB1J,EAAO0J,GAAM,EAAI31E,GAEjBisE,EAAO8B,IAAI6H,eAAiBrG,CAEhC,MAEJ,KAAK,IACL,IAAK,KACL,IAAK,IACL,IAAK,KACL,IAAK,IACL,IAAK,IACL,IAAK,IACDpT,EAAQA,EAAMp2D,OAAO,EAAG,EAE5B,KAAK,OACL,IAAK,OACL,IAAK,QACDo2D,EAAQA,EAAMp2D,OAAO,EAAG,GACpBwpE,IACAtD,EAAO0J,GAAK1J,EAAO0J,OACnB1J,EAAO0J,GAAGxZ,GAASyT,EAAML,GAE7B,MACJ,KAAK,KACL,IAAK,KACDtD,EAAO0J,GAAK1J,EAAO0J,OACnB1J,EAAO0J,GAAGxZ,GAAS79D,GAAOi3E,kBAAkBhG,IAIpD,QAASsG,GAAsB5J,GAC3B,GAAIniB,GAAGgsB,EAAU/I,EAAMzwC,EAAS00C,EAAKC,EAAK8E,CAE1CjsB,GAAImiB,EAAO0J,GACC,MAAR7rB,EAAEksB,IAAqB,MAAPlsB,EAAEmsB,GAAoB,MAAPnsB,EAAEosB,GACjClF,EAAM,EACNC,EAAM,EAMN6E,EAAWnM,EAAI7f,EAAEksB,GAAI/J,EAAOoF,GAAGG,IAAON,GAAW5yE,KAAU,EAAG,GAAG00B,MACjE+5C,EAAOpD,EAAI7f,EAAEmsB,EAAG,GAChB35C,EAAUqtC,EAAI7f,EAAEosB,EAAG,KAEnBlF,EAAM/E,EAAOmB,QAAQ+I,MAAMnF,IAC3BC,EAAMhF,EAAOmB,QAAQ+I,MAAMlF,IAE3B6E,EAAWnM,EAAI7f,EAAEssB,GAAInK,EAAOoF,GAAGG,IAAON,GAAW5yE,KAAU0yE,EAAKC,GAAKj+C,MACrE+5C,EAAOpD,EAAI7f,EAAEA,EAAG,GAEL,MAAPA,EAAE1iD,GAEFk1B,EAAUwtB,EAAE1iD,EACE4pE,EAAV10C,KACEywC,GAINzwC,EAFc,MAAPwtB,EAAEv4B,EAECu4B,EAAEv4B,EAAIy/C,EAGNA,GAGlB+E,EAAOM,GAAmBP,EAAU/I,EAAMzwC,EAAS20C,EAAKD,GAExD/E,EAAOoF,GAAGG,IAAQuE,EAAK/iD,KACvBi5C,EAAOqJ,WAAaS,EAAKhjD,UAO7B,QAASujD,GAAerK,GACpB,GAAIhsE,GAAGizB,EAAkBqjD,EAAaC,EAAzBjH,IAEb,KAAItD,EAAOt5C,GAAX,CA6BA,IAzBA4jD,EAAcE,GAAiBxK,GAG3BA,EAAO0J,IAAyB,MAAnB1J,EAAOoF,GAAGE,KAAqC,MAApBtF,EAAOoF,GAAGC,KAClDuE,EAAsB5J,GAItBA,EAAOqJ,aACPkB,EAAY7M,EAAIsC,EAAOoF,GAAGG,IAAO+E,EAAY/E,KAEzCvF,EAAOqJ,WAAanE,EAAWqF,KAC/BvK,EAAO8B,IAAI8D,oBAAqB,GAGpC3+C,EAAOwjD,GAAYF,EAAW,EAAGvK,EAAOqJ,YACxCrJ,EAAOoF,GAAGC,IAASp+C,EAAKyjD,cACxB1K,EAAOoF,GAAGE,IAAQr+C,EAAK49C,cAQtB7wE,EAAI,EAAO,EAAJA,GAAyB,MAAhBgsE,EAAOoF,GAAGpxE,KAAcA,EACzCgsE,EAAOoF,GAAGpxE,GAAKsvE,EAAMtvE,GAAKs2E,EAAYt2E,EAI1C,MAAW,EAAJA,EAAOA,IACVgsE,EAAOoF,GAAGpxE,GAAKsvE,EAAMtvE,GAAsB,MAAhBgsE,EAAOoF,GAAGpxE,GAAqB,IAANA,EAAU,EAAI,EAAKgsE,EAAOoF,GAAGpxE,EAI7D,MAApBgsE,EAAOoF,GAAGI,KACgB,IAAtBxF,EAAOoF,GAAGK,KACY,IAAtBzF,EAAOoF,GAAGM,KACiB,IAA3B1F,EAAOoF,GAAGO,MACd3F,EAAO2K,UAAW,EAClB3K,EAAOoF,GAAGI,IAAQ,GAGtBxF,EAAOt5C,IAAMs5C,EAAOwJ,QAAUiB,GAAcG,IAAUrkE,MAAM,KAAM+8D,GAG/C,MAAftD,EAAO2B,MACP3B,EAAOt5C,GAAGmkD,cAAc7K,EAAOt5C,GAAGokD,gBAAkB9K,EAAO2B,MAG3D3B,EAAO2K,WACP3K,EAAOoF,GAAGI,IAAQ,KAI1B,QAASuF,GAAe/K,GACpB,GAAIO,EAEAP,GAAOt5C,KAIX65C,EAAkBC,EAAqBR,EAAOuB,IAC9CvB,EAAOoF,IACH7E,EAAgBx5C,KAChBw5C,EAAgBr5C,MAChBq5C,EAAgB15C,KAAO05C,EAAgBt5C,KACvCs5C,EAAgBnwC,KAChBmwC,EAAgBpwC,OAChBowC,EAAgBrwC,OAChBqwC,EAAgBtwC,aAGpBo6C,EAAerK,IAGnB,QAASwK,IAAiBxK,GACtB,GAAIr0C,GAAM,GAAI74B,KACd,OAAIktE,GAAOwJ,SAEH79C,EAAIq/C,iBACJr/C,EAAI++C,cACJ/+C,EAAIk5C,eAGAl5C,EAAIgF,cAAehF,EAAI4F,WAAY5F,EAAI2F,WAKvD,QAAS25C,IAA4BjL,GACjC,GAAIA,EAAOwB,KAAOnvE,GAAO64E,SAErB,WADAC,IAASnL,EAIbA,GAAOoF,MACPpF,EAAO8B,IAAIjE,OAAQ,CAGnB,IACI7pE,GAAGo3E,EAAaC,EAAQnb,EAAOob,EAD/BxC,EAAS,GAAK9I,EAAOuB,GAErBgK,EAAezC,EAAO30E,OACtBq3E,EAAyB,CAI7B,KAFAH,EAAStE,EAAa/G,EAAOwB,GAAIxB,EAAOmB,SAASpuE,MAAM6zE,QAElD5yE,EAAI,EAAGA,EAAIq3E,EAAOl3E,OAAQH,IAC3Bk8D,EAAQmb,EAAOr3E,GACfo3E,GAAetC,EAAO/1E,MAAMu0E,EAAsBpX,EAAO8P,SAAgB,GACrEoL,IACAE,EAAUxC,EAAOhvE,OAAO,EAAGgvE,EAAO3zE,QAAQi2E,IACtCE,EAAQn3E,OAAS,GACjB6rE,EAAO8B,IAAI/D,YAAYvnE,KAAK80E,GAEhCxC,EAASA,EAAOrvE,MAAMqvE,EAAO3zE,QAAQi2E,GAAeA,EAAYj3E,QAChEq3E,GAA0BJ,EAAYj3E,QAGtC0yE,GAAqB3W,IACjBkb,EACApL,EAAO8B,IAAIjE,OAAQ,EAGnBmC,EAAO8B,IAAIhE,aAAatnE,KAAK05D,GAEjCgZ,EAAwBhZ,EAAOkb,EAAapL,IAEvCA,EAAO0B,UAAY0J,GACxBpL,EAAO8B,IAAIhE,aAAatnE,KAAK05D,EAKrC8P,GAAO8B,IAAI9D,cAAgBuN,EAAeC,EACtC1C,EAAO30E,OAAS,GAChB6rE,EAAO8B,IAAI/D,YAAYvnE,KAAKsyE,GAI5B9I,EAAO8B,IAAIkE,WAAY,GAAQhG,EAAOoF,GAAGI,KAAS,KAClDxF,EAAO8B,IAAIkE,QAAUhxE,GAGzBgrE,EAAOoF,GAAGI,IAAQ/F,EAAgBO,EAAOmB,QAASnB,EAAOoF,GAAGI,IACpDxF,EAAOuJ,WACfc,EAAerK,GACfE,EAAcF,GAGlB,QAAS4I,IAAexuE,GACpB,MAAOA,GAAEpB,QAAQ,sCAAuC,SAAUyyE,EAAS7S,EAAIC,EAAIC,EAAI4S,GACnF,MAAO9S,IAAMC,GAAMC,GAAM4S,IAKjC,QAAS/C,IAAavuE,GAClB,MAAOA,GAAEpB,QAAQ,yBAA0B,QAI/C,QAAS2yE,IAA2B3L,GAChC,GAAI4L,GACAC,EAEAC,EACA93E,EACA+3E,CAEJ,IAAyB,IAArB/L,EAAOwB,GAAGrtE,OAGV,MAFA6rE,GAAO8B,IAAI3D,eAAgB,OAC3B6B,EAAOt5C,GAAK,GAAI5zB,MAAKk5E,KAIzB,KAAKh4E,EAAI,EAAGA,EAAIgsE,EAAOwB,GAAGrtE,OAAQH,IAC9B+3E,EAAe,EACfH,EAAazL,KAAeH,GACN,MAAlBA,EAAOwJ,UACPoC,EAAWpC,QAAUxJ,EAAOwJ,SAEhCoC,EAAW9J,IAAMlE,IACjBgO,EAAWpK,GAAKxB,EAAOwB,GAAGxtE,GAC1Bi3E,GAA4BW,GAEvB/F,EAAQ+F,KAKbG,GAAgBH,EAAW9J,IAAI9D,cAG/B+N,GAAqD,GAArCH,EAAW9J,IAAIhE,aAAa3pE,OAE5Cy3E,EAAW9J,IAAImK,MAAQF,GAEJ,MAAfD,GAAsCA,EAAfC,KACvBD,EAAcC,EACdF,EAAaD,GAIrB93E,GAAOksE,EAAQ6L,GAAcD,GAIjC,QAAST,IAASnL,GACd,GAAIhsE,GAAGk4E,EACHpD,EAAS9I,EAAOuB,GAChBxuE,EAAQo5E,GAASl5E,KAAK61E,EAE1B,IAAI/1E,EAAO,CAEP,IADAitE,EAAO8B,IAAIzD,KAAM,EACZrqE,EAAI,EAAGk4E,EAAIE,GAASj4E,OAAY+3E,EAAJl4E,EAAOA,IACpC,GAAIo4E,GAASp4E,GAAG,GAAGf,KAAK61E,GAAS,CAE7B9I,EAAOwB,GAAK4K,GAASp4E,GAAG,IAAMjB,EAAM,IAAM,IAC1C,OAGR,IAAKiB,EAAI,EAAGk4E,EAAIG,GAASl4E,OAAY+3E,EAAJl4E,EAAOA,IACpC,GAAIq4E,GAASr4E,GAAG,GAAGf,KAAK61E,GAAS,CAC7B9I,EAAOwB,IAAM6K,GAASr4E,GAAG,EACzB,OAGJ80E,EAAO/1E,MAAMq1E,MACbpI,EAAOwB,IAAM,KAEjByJ,GAA4BjL,OAE5BA,GAAO8F,UAAW,EAK1B,QAASwG,IAAmBtM,GACxBmL,GAASnL,GACLA,EAAO8F,YAAa,UACb9F,GAAO8F,SACdzzE,GAAOk6E,wBAAwBvM,IAIvC,QAASnkE,IAAI+sC,EAAKlhC,GACd,GAAc1T,GAAVuuE,IACJ,KAAKvuE,EAAI,EAAGA,EAAI40C,EAAIz0C,SAAUH,EAC1BuuE,EAAI/rE,KAAKkR,EAAGkhC,EAAI50C,GAAIA,GAExB,OAAOuuE,GAGX,QAASiK,IAAkBxM,GACvB,GAAuByL,GAAnBnI,EAAQtD,EAAOuB,EACf+B,KAAUtuE,EACVgrE,EAAOt5C,GAAK,GAAI5zB,MACTD,EAAOywE,GACdtD,EAAOt5C,GAAK,GAAI5zB,OAAMwwE,GAC6B,QAA3CmI,EAAUgB,GAAgBx5E,KAAKqwE,IACvCtD,EAAOt5C,GAAK,GAAI5zB,OAAM24E,EAAQ,IACN,gBAAVnI,GACdgJ,GAAmBtM,GACZtrE,EAAQ4uE,IACftD,EAAOoF,GAAKvpE,GAAIynE,EAAM7pE,MAAM,GAAI,SAAU8X,GACtC,MAAOnY,UAASmY,EAAK,MAEzB84D,EAAerK,IACU,gBAAZ,GACb+K,EAAe/K,GACU,gBAAZ,GAEbA,EAAOt5C,GAAK,GAAI5zB,MAAKwwE,GAErBjxE,GAAOk6E,wBAAwBvM,GAIvC,QAAS4K,IAASpqE,EAAGxR,EAAGmM,EAAGhB,EAAGo/D,EAAGn/D,EAAGsyE,GAGhC,GAAIzlD,GAAO,GAAIn0B,MAAK0N,EAAGxR,EAAGmM,EAAGhB,EAAGo/D,EAAGn/D,EAAGsyE,EAMtC,OAHQ,MAAJlsE,GACAymB,EAAKyJ,YAAYlwB,GAEdymB,EAGX,QAASwjD,IAAYjqE,GACjB,GAAIymB,GAAO,GAAIn0B,MAAKA,KAAK8xE,IAAIr+D,MAAM,KAAMrS,WAIzC,OAHQ,MAAJsM,GACAymB,EAAK0lD,eAAensE,GAEjBymB,EAGX,QAAS2lD,IAAatJ,EAAOzwC,GACzB,GAAqB,gBAAVywC,GACP,GAAKpwE,MAAMowE,IAKP,GADAA,EAAQzwC,EAAO42C,cAAcnG,GACR,gBAAVA,GACP,MAAO,UALXA,GAAQlqE,SAASkqE,EAAO,GAShC,OAAOA,GASX,QAASuJ,IAAkB/D,EAAQ7G,EAAQ6K,EAAeC,EAAUl6C,GAChE,MAAOA,GAAOm6C,aAAa/K,GAAU,IAAK6K,EAAehE,EAAQiE,GAGrE,QAASC,IAAaC,EAAgBH,EAAej6C,GACjD,GAAIv0B,GAAWjM,GAAOiM,SAAS2uE,GAAgB5zD,MAC3CyS,EAAU5P,GAAM5d,EAASmf,GAAG,MAC5BoO,EAAU3P,GAAM5d,EAASmf,GAAG,MAC5BmO,EAAQ1P,GAAM5d,EAASmf,GAAG,MAC1BsjD,EAAO7kD,GAAM5d,EAASmf,GAAG,MACzBmjD,EAAS1kD,GAAM5d,EAASmf,GAAG,MAC3BgjD,EAAQvkD,GAAM5d,EAASmf,GAAG,MAE1BhW,EAAOqkB,EAAUohD,GAAuB9yE,IAAM,IAAK0xB,IACnC,IAAZD,IAAkB,MAClBA,EAAUqhD,GAAuBl+E,IAAM,KAAM68B,IACnC,IAAVD,IAAgB,MAChBA,EAAQshD,GAAuB/yE,IAAM,KAAMyxB,IAClC,IAATm1C,IAAe,MACfA,EAAOmM,GAAuB/xE,IAAM,KAAM4lE,IAC/B,IAAXH,IAAiB,MACjBA,EAASsM,GAAuB3T,IAAM,KAAMqH,IAClC,IAAVH,IAAgB,OAAS,KAAMA,EAKvC,OAHAh5D,GAAK,GAAKqlE,EACVrlE,EAAK,IAAMwlE,EAAiB,EAC5BxlE,EAAK,GAAKorB,EACHg6C,GAAkBtmE,SAAUkB,GAgBvC,QAASw9D,IAAWjC,EAAKmK,EAAgBC,GACrC,GAEIC,GAFAhvE,EAAM+uE,EAAuBD,EAC7BG,EAAkBF,EAAuBpK,EAAIn8C,KAajD,OATIymD,GAAkBjvE,IAClBivE,GAAmB,GAGDjvE,EAAM,EAAxBivE,IACAA,GAAmB,GAGvBD,EAAiBh7E,GAAO2wE,GAAKvhE,IAAI6rE,EAAiB,MAE9CxM,KAAMptE,KAAKwyC,KAAKmnC,EAAevmD,YAAc,GAC7CC,KAAMsmD,EAAetmD,QAK7B,QAASqjD,IAAmBrjD,EAAM+5C,EAAMzwC,EAAS+8C,EAAsBD,GACnE,GAA6CI,GAAWzmD,EAApD3rB,EAAIsvE,GAAY1jD,EAAM,EAAG,GAAGymD,WAOhC,OALAryE,GAAU,IAANA,EAAU,EAAIA,EAClBk1B,EAAqB,MAAXA,EAAkBA,EAAU88C,EACtCI,EAAYJ,EAAiBhyE,GAAKA,EAAIiyE,EAAuB,EAAI,IAAUD,EAAJhyE,EAAqB,EAAI,GAChG2rB,EAAY,GAAKg6C,EAAO,IAAMzwC,EAAU88C,GAAkBI,EAAY,GAGlExmD,KAAMD,EAAY,EAAIC,EAAOA,EAAO,EACpCD,UAAWA,EAAY,EAAKA,EAAYo+C,EAAWn+C,EAAO,GAAKD,GAQvE,QAAS2mD,IAAWzN,GAChB,GAEIuC,GAFAe,EAAQtD,EAAOuB,GACfzxC,EAASkwC,EAAOwB,EAKpB,OAFAxB,GAAOmB,QAAUnB,EAAOmB,SAAW9uE,GAAO8sE,WAAWa,EAAOyB,IAE9C,OAAV6B,GAAmBxzC,IAAW96B,GAAuB,KAAVsuE,EACpCjxE,GAAOq7E,SAASzP,WAAW,KAGjB,gBAAVqF,KACPtD,EAAOuB,GAAK+B,EAAQtD,EAAOmB,QAAQwM,SAASrK,IAG5CjxE,GAAOoD,SAAS6tE,GACT,GAAIvD,GAAOuD,GAAO,IAClBxzC,EACHp7B,EAAQo7B,GACR67C,GAA2B3L,GAE3BiL,GAA4BjL,GAGhCwM,GAAkBxM,GAGtBuC,EAAM,GAAIxC,GAAOC,GACbuC,EAAIoI,WAEJpI,EAAI9gE,IAAI,EAAG,KACX8gE,EAAIoI,SAAW31E,GAGZutE,IAyCX,QAASqL,IAAOlmE,EAAImmE,GAChB,GAAItL,GAAKvuE,CAIT,IAHuB,IAAnB65E,EAAQ15E,QAAgBO,EAAQm5E,EAAQ,MACxCA,EAAUA,EAAQ,KAEjBA,EAAQ15E,OACT,MAAO9B,KAGX,KADAkwE,EAAMsL,EAAQ,GACT75E,EAAI,EAAGA,EAAI65E,EAAQ15E,SAAUH,EAC1B65E,EAAQ75E,GAAG0T,GAAI66D,KACfA,EAAMsL,EAAQ75E,GAGtB,OAAOuuE,GAsvBX,QAASc,IAAeL,EAAKntE,GACzB,GAAIi4E,EAGJ,OAAqB,gBAAVj4E,KACPA,EAAQmtE,EAAI7D,aAAaiK,YAAYvzE,GAEhB,gBAAVA,IACAmtE,GAIf8K,EAAap6E,KAAK4G,IAAI0oE,EAAI/7C,OAClB09C,EAAY3B,EAAIj8C,OAAQlxB,IAChCmtE,EAAIt8C,GAAG,OAASs8C,EAAIpB,OAAS,MAAQ,IAAM,SAAS/rE,EAAOi4E,GACpD9K,GAGX,QAASI,IAAUJ,EAAK+K,GACpB,MAAO/K,GAAIt8C,GAAG,OAASs8C,EAAIpB,OAAS,MAAQ,IAAMmM,KAGtD,QAAS5K,IAAUH,EAAK+K,EAAMl4E,GAC1B,MAAa,UAATk4E,EACO1K,GAAeL,EAAKntE,GAEpBmtE,EAAIt8C,GAAG,OAASs8C,EAAIpB,OAAS,MAAQ,IAAMmM,GAAMl4E,GAIhE,QAASm4E,IAAaD,EAAME,GACxB,MAAO,UAAUp4E,GACb,MAAa,OAATA,GACAstE,GAAU30E,KAAMu/E,EAAMl4E,GACtBxD,GAAOguE,aAAa7xE,KAAMy/E,GACnBz/E,MAEA40E,GAAU50E,KAAMu/E,IAqCnC,QAASG,IAAanN,GAElB,MAAc,KAAPA,EAAa,OAGxB,QAASoN,IAAa1N,GAGlB,MAAe,QAARA,EAAiB,IAuL5B,QAAS2N,IAAmB3pE,GACxBpS,GAAOiM,SAASoJ,GAAGjD,GAAQ,WACvB,MAAOjW,MAAK4S,MAAMqD,IA2D1B,QAAS4pE,IAAWC,GAEK,mBAAVC,SAGXC,GAAkBC,GAAYp8E,OAE1Bo8E,GAAYp8E,OADZi8E,EACqB5P,EACb,uGAGArsE,IAEaA,IAplF7B,IA/WA,GAAIA,IAIAm8E,GAGAx6E,GANA06E,GAAU,QAEVD,GAAiC,mBAAXhR,IAA6C,mBAAX1lE,SAA0BA,SAAW0lE,EAAO1lE,OAAoBvJ,KAATivE,EAE/GvhD,GAAQxoB,KAAKwoB,MACb5nB,GAAiBS,OAAO4M,UAAUrN,eAGlCixE,GAAO,EACPF,GAAQ,EACRC,GAAO,EACPE,GAAO,EACPC,GAAS,EACTC,GAAS,EACTC,GAAc,EAGdtyC,MAGA0uC,MAGAuE,GAA+B,mBAAXj4E,IAA0BA,GAAUA,EAAOD,QAG/Dq+E,GAAkB,sBAClBkC,GAA0B,uDAI1BC,GAAmB,gIAGnBhI,GAAmB,qKACnBQ,GAAwB,6CAGxBmB,GAA2B,QAC3BR,GAA6B,UAC7BL,GAA4B,UAC5BG,GAA2B,gBAC3BS,GAAmB,MACnBN,GAAiB,mHACjBI,GAAqB,uBACrBC,GAAc,KACdH,GAAqB,aACrBC,GAAwB,yBAGxBZ,GAAqB,KACrBO,GAAsB,OACtBN,GAAwB,QACxBC,GAAuB,QACvBG,GAAsB,aACtBD,GAAyB,WAIzBwE,GAAW,4IAEX0C,GAAY,uBAEZzC,KACK,eAAgB,0BAChB,aAAc,sBACd,eAAgB,oBAChB,aAAc,iBACd,WAAY,gBAIjBC,KACK,gBAAiB,6BACjB,WAAY,wBACZ,QAAS,mBACT,KAAM,cAIXpD,GAAuB,kBAIvB6F,IADyB,0CAA0Cv4E,MAAM,MAErEw4E,aAAiB,EACjBC,QAAY,IACZC,QAAY,IACZC,MAAU,KACVC,KAAS,MACTC,OAAW,OACXC,MAAU,UAGdtL,IACI2I,GAAK,cACLtyE,EAAI,SACJpL,EAAI,SACJmL,EAAI,OACJgB,EAAI,MACJm0E,EAAI,OACJzxB,EAAI,OACJmsB,EAAI,UACJzQ,EAAI,QACJgW,EAAI,UACJ/uE,EAAI,OACJgvE,IAAM,YACNlqD,EAAI,UACJ2kD,EAAI,aACJE,GAAI,WACJJ,GAAI,eAGR/F,IACIyL,UAAY,YACZC,WAAa,aACbC,QAAU,UACVC,SAAW,WACXC,YAAc,eAIlB7I,MAGAkG,IACI9yE,EAAG,GACHpL,EAAG,GACHmL,EAAG,GACHgB,EAAG,GACHo+D,EAAG,IAIPuW,GAAmB,gBAAgBv5E,MAAM,KACzCw5E,GAAe,kBAAkBx5E,MAAM,KAEvCswE,IACItN,EAAO,WACH,MAAO/qE,MAAK04B,QAAU,GAE1B8oD,IAAO,SAAUlgD,GACb,MAAOthC,MAAK2wE,aAAa8Q,YAAYzhF,KAAMshC,IAE/CogD,KAAO,SAAUpgD,GACb,MAAOthC,MAAK2wE,aAAayB,OAAOpyE,KAAMshC,IAE1Cw/C,EAAO,WACH,MAAO9gF,MAAKy4B,QAEhBuoD,IAAO,WACH,MAAOhhF,MAAKs4B,aAEhB3rB,EAAO,WACH,MAAO3M,MAAKq4B,OAEhBspD,GAAO,SAAUrgD,GACb,MAAOthC,MAAK2wE,aAAaiR,YAAY5hF,KAAMshC,IAE/CugD,IAAO,SAAUvgD,GACb,MAAOthC,MAAK2wE,aAAamR,cAAc9hF,KAAMshC,IAEjDygD,KAAO,SAAUzgD,GACb,MAAOthC,MAAK2wE,aAAaqR,SAAShiF,KAAMshC,IAE5C+tB,EAAO,WACH,MAAOrvD,MAAKsyE,QAEhBkJ,EAAO,WACH,MAAOx7E,MAAKiiF,WAEhBC,GAAO,WACH,MAAO1R,GAAaxwE,KAAKu4B,OAAS,IAAK,IAE3C4pD,KAAO,WACH,MAAO3R,GAAaxwE,KAAKu4B,OAAQ,IAErC6pD,MAAQ,WACJ,MAAO5R,GAAaxwE,KAAKu4B,OAAQ,IAErC8pD,OAAS,WACL,GAAIrwE,GAAIhS,KAAKu4B,OAAQvJ,EAAOhd,GAAK,EAAI,IAAM,GAC3C,OAAOgd,GAAOwhD,EAAatrE,KAAK2lB,IAAI7Y,GAAI,IAE5C2pE,GAAO,WACH,MAAOnL,GAAaxwE,KAAKq7E,WAAa,IAAK,IAE/CiH,KAAO,WACH,MAAO9R,GAAaxwE,KAAKq7E,WAAY,IAEzCkH,MAAQ,WACJ,MAAO/R,GAAaxwE,KAAKq7E,WAAY,IAEzCE,GAAO,WACH,MAAO/K,GAAaxwE,KAAKwiF,cAAgB,IAAK,IAElDC,KAAO,WACH,MAAOjS,GAAaxwE,KAAKwiF,cAAe,IAE5CE,MAAQ,WACJ,MAAOlS,GAAaxwE,KAAKwiF,cAAe,IAE5C1rD,EAAI,WACA,MAAO92B,MAAK6hC,WAEhB45C,EAAI,WACA,MAAOz7E,MAAK2iF,cAEhBp9E,EAAO,WACH,MAAOvF,MAAK2wE,aAAaO,SAASlxE,KAAKo9B,QAASp9B,KAAKq9B,WAAW,IAEpEwtC,EAAO,WACH,MAAO7qE,MAAK2wE,aAAaO,SAASlxE,KAAKo9B,QAASp9B,KAAKq9B,WAAW,IAEpEjT,EAAO,WACH,MAAOpqB,MAAKo9B,SAEhBzxB,EAAO,WACH,MAAO3L,MAAKo9B,QAAU,IAAM,IAEhC58B,EAAO,WACH,MAAOR,MAAKq9B,WAEhBzxB,EAAO,WACH,MAAO5L,MAAKs9B,WAEhBjT,EAAO,WACH,MAAO8qD,GAAMn1E,KAAKu9B,eAAiB,MAEvCqlD,GAAO,WACH,MAAOpS,GAAa2E,EAAMn1E,KAAKu9B,eAAiB,IAAK,IAEzDslD,IAAO,WACH,MAAOrS,GAAaxwE,KAAKu9B,eAAgB,IAE7CulD,KAAO,WACH,MAAOtS,GAAaxwE,KAAKu9B,eAAgB,IAE7CwlD,EAAO,WACH,GAAIx9E,GAAIvF,KAAKgjF,YACT58E,EAAI,GAKR,OAJQ,GAAJb,IACAA,GAAKA,EACLa,EAAI,KAEDA,EAAIoqE,EAAa2E,EAAM5vE,EAAI,IAAK,GAAK,IAAMirE,EAAa2E,EAAM5vE,GAAK,GAAI,IAElF09E,GAAO,WACH,GAAI19E,GAAIvF,KAAKgjF,YACT58E,EAAI,GAKR,OAJQ,GAAJb,IACAA,GAAKA,EACLa,EAAI,KAEDA,EAAIoqE,EAAa2E,EAAM5vE,EAAI,IAAK,GAAKirE,EAAa2E,EAAM5vE,GAAK,GAAI,IAE5E2X,EAAI,WACA,MAAOld,MAAKkjF,YAEhBC,GAAK,WACD,MAAOnjF,MAAKojF,YAEhBrxE,EAAO,WACH,MAAO/R,MAAKgH,WAEhB4jB,EAAO,WACH,MAAO5qB,MAAKqjF,QAEhBtC,EAAI,WACA,MAAO/gF,MAAKmyE,YAIpB9B,MAEAiT,IAAS,SAAU,cAAe,WAAY,gBAAiB,eAE/D1R,IAAmB,EAyFhB0P,GAAiB37E,QACpBH,GAAI87E,GAAiBjnC,MACrBg+B,GAAqB7yE,GAAI,KAAOirE,EAAgB4H,GAAqB7yE,IAAIA,GAE7E,MAAO+7E,GAAa57E,QAChBH,GAAI+7E,GAAalnC,MACjBg+B,GAAqB7yE,GAAIA,IAAK8qE,EAAS+H,GAAqB7yE,IAAI,EAEpE6yE,IAAqBkL,KAAOjT,EAAS+H,GAAqB2I,IAAK,GA0d/D17E,EAAOgsE,EAAOn+D,WAEVg2C,IAAM,SAAUqoB,GACZ,GAAI3rE,GAAML,CACV,KAAKA,IAAKgsE,GACN3rE,EAAO2rE,EAAOhsE,GACM,kBAATK,GACP7F,KAAKwF,GAAKK,EAEV7F,KAAK,IAAMwF,GAAKK,CAKxB7F,MAAKi6E,qBAAuB,GAAIC,QAAOl6E,KAAKg6E,cAAcvV,OAAS,IAAM,UAAUA,SAGvFiO,QAAU,wFAAwF3qE,MAAM,KACxGqqE,OAAS,SAAU5xE,GACf,MAAOR,MAAK0yE,QAAQlyE,EAAEk4B,UAG1B8qD,aAAe,kDAAkDz7E,MAAM,KACvE05E,YAAc,SAAUjhF,GACpB,MAAOR,MAAKwjF,aAAahjF,EAAEk4B,UAG/BkiD,YAAc,SAAU6I,EAAWniD,EAAQ6gC,GACvC,GAAI38D,GAAGgvE,EAAKkP,CAQZ,KANK1jF,KAAK2jF,eACN3jF,KAAK2jF,gBACL3jF,KAAK4jF,oBACL5jF,KAAK6jF,sBAGJr+E,EAAI,EAAO,GAAJA,EAAQA,IAAK,CAYrB,GAVAgvE,EAAM3wE,GAAOkyE,KAAK,IAAMvwE,IACpB28D,IAAWniE,KAAK4jF,iBAAiBp+E,KACjCxF,KAAK4jF,iBAAiBp+E,GAAK,GAAI00E,QAAO,IAAMl6E,KAAKoyE,OAAOoC,EAAK,IAAIhqE,QAAQ,IAAK,IAAM,IAAK,KACzFxK,KAAK6jF,kBAAkBr+E,GAAK,GAAI00E,QAAO,IAAMl6E,KAAKyhF,YAAYjN,EAAK,IAAIhqE,QAAQ,IAAK,IAAM,IAAK,MAE9F23D,GAAWniE,KAAK2jF,aAAan+E,KAC9Bk+E,EAAQ,IAAM1jF,KAAKoyE,OAAOoC,EAAK,IAAM,KAAOx0E,KAAKyhF,YAAYjN,EAAK,IAClEx0E,KAAK2jF,aAAan+E,GAAK,GAAI00E,QAAOwJ,EAAMl5E,QAAQ,IAAK,IAAK,MAG1D23D,GAAqB,SAAX7gC,GAAqBthC,KAAK4jF,iBAAiBp+E,GAAGwI,KAAKy1E,GAC7D,MAAOj+E,EACJ,IAAI28D,GAAqB,QAAX7gC,GAAoBthC,KAAK6jF,kBAAkBr+E,GAAGwI,KAAKy1E,GACpE,MAAOj+E,EACJ,KAAK28D,GAAUniE,KAAK2jF,aAAan+E,GAAGwI,KAAKy1E,GAC5C,MAAOj+E,KAKnBs+E,UAAY,2DAA2D/7E,MAAM,KAC7Ei6E,SAAW,SAAUxhF,GACjB,MAAOR,MAAK8jF,UAAUtjF,EAAE63B,QAG5B0rD,eAAiB,8BAA8Bh8E,MAAM,KACrD+5E,cAAgB,SAAUthF,GACtB,MAAOR,MAAK+jF,eAAevjF,EAAE63B,QAGjC2rD,aAAe,uBAAuBj8E,MAAM,KAC5C65E,YAAc,SAAUphF,GACpB,MAAOR,MAAKgkF,aAAaxjF,EAAE63B,QAG/B4iD,cAAgB,SAAUgJ,GACtB,GAAIz+E,GAAGgvE,EAAKkP,CAMZ,KAJK1jF,KAAKkkF,iBACNlkF,KAAKkkF,mBAGJ1+E,EAAI,EAAO,EAAJA,EAAOA,IAQf,GANKxF,KAAKkkF,eAAe1+E,KACrBgvE,EAAM3wE,IAAQ,IAAM,IAAIw0B,IAAI7yB,GAC5Bk+E,EAAQ,IAAM1jF,KAAKgiF,SAASxN,EAAK,IAAM,KAAOx0E,KAAK8hF,cAActN,EAAK,IAAM,KAAOx0E,KAAK4hF,YAAYpN,EAAK,IACzGx0E,KAAKkkF,eAAe1+E,GAAK,GAAI00E,QAAOwJ,EAAMl5E,QAAQ,IAAK,IAAK,MAG5DxK,KAAKkkF,eAAe1+E,GAAGwI,KAAKi2E,GAC5B,MAAOz+E,IAKnB2+E,iBACIC,IAAM,YACNC,GAAK,SACLC,EAAI,aACJC,GAAK,eACLC,IAAM,kBACNC,KAAO,yBAEX9L,eAAiB,SAAUjwE,GACvB,GAAIkrE,GAAS5zE,KAAKmkF,gBAAgBz7E,EAOlC;OANKkrE,GAAU5zE,KAAKmkF,gBAAgBz7E,EAAIu8B,iBACpC2uC,EAAS5zE,KAAKmkF,gBAAgBz7E,EAAIu8B,eAAez6B,QAAQ,mBAAoB,SAAUqoE,GACnF,MAAOA,GAAI5nE,MAAM,KAErBjL,KAAKmkF,gBAAgBz7E,GAAOkrE,GAEzBA,GAGXvC,KAAO,SAAUyD,GAGb,MAAiD,OAAxCA,EAAQ,IAAIvwC,cAAcnf,OAAO,IAG9Cq0D,eAAiB,gBACjBvI,SAAW,SAAU9zC,EAAOC,EAASqnD,GACjC,MAAItnD,GAAQ,GACDsnD,EAAU,KAAO,KAEjBA,EAAU,KAAO,MAKhCC,WACIC,QAAU,gBACVC,QAAU,mBACVC,SAAW,eACXC,QAAU,oBACVC,SAAW,sBACXC,SAAW,KAEfC,SAAW,SAAUx8E,EAAK8rE,EAAKr3C,GAC3B,GAAIy2C,GAAS5zE,KAAK2kF,UAAUj8E,EAC5B,OAAyB,kBAAXkrE,GAAwBA,EAAO77D,MAAMy8D,GAAMr3C,IAAQy2C,GAGrEuR,eACIC,OAAS,QACTC,KAAO,SACPz5E,EAAI,gBACJpL,EAAI,WACJ8kF,GAAK,aACL35E,EAAI,UACJ45E,GAAK,WACL54E,EAAI,QACJg1E,GAAK,UACL5W,EAAI,UACJya,GAAK,YACLxzE,EAAI,SACJyzE,GAAK,YAGTjH,aAAe,SAAU/K,EAAQ6K,EAAehE,EAAQiE,GACpD,GAAI3K,GAAS5zE,KAAKmlF,cAAc7K,EAChC,OAA0B,kBAAX1G,GACXA,EAAOH,EAAQ6K,EAAehE,EAAQiE,GACtC3K,EAAOppE,QAAQ,MAAOipE,IAG9BiS,WAAa,SAAUr5D,EAAMunD,GACzB,GAAItyC,GAASthC,KAAKmlF,cAAc94D,EAAO,EAAI,SAAW,OACtD,OAAyB,kBAAXiV,GAAwBA,EAAOsyC,GAAUtyC,EAAO92B,QAAQ,MAAOopE,IAGjFhD,QAAU,SAAU6C,GAChB,MAAOzzE,MAAK2lF,SAASn7E,QAAQ,KAAMipE,IAEvCkS,SAAW,KACX3L,cAAgB,UAEhBmF,SAAW,SAAU7E,GACjB,MAAOA,IAGXsL,WAAa,SAAUtL,GACnB,MAAOA,IAGXhI,KAAO,SAAUkC,GACb,MAAOiC,IAAWjC,EAAKx0E,KAAK07E,MAAMnF,IAAKv2E,KAAK07E,MAAMlF,KAAKlE,MAG3DoJ,OACInF,IAAM,EACNC,IAAM,GAGVmI,eAAiB,WACb,MAAO3+E,MAAK07E,MAAMnF,KAGtBsP,eAAiB,WACb,MAAO7lF,MAAK07E,MAAMlF,KAGtBsP,aAAc,eACdrN,YAAa,WACT,MAAOz4E,MAAK8lF,gBA0yBpBjiF,GAAS,SAAUixE,EAAOxzC,EAAQ+C,EAAQ89B,GACtC,GAAI1hE,EAiBJ,OAfuB,iBAAb,KACN0hE,EAAS99B,EACTA,EAAS79B,GAIb/F,KACAA,EAAEqyE,kBAAmB,EACrBryE,EAAEsyE,GAAK+B,EACPr0E,EAAEuyE,GAAK1xC,EACP7gC,EAAEwyE,GAAK5uC,EACP5jC,EAAEyyE,QAAU/Q,EACZ1hE,EAAE2yE,QAAS,EACX3yE,EAAE6yE,IAAMlE,IAED6P,GAAWx+E,IAGtBoD,GAAOmsE,6BAA8B,EAErCnsE,GAAOk6E,wBAA0B7N,EAC7B,4LAIA,SAAUsB,GACNA,EAAOt5C,GAAK,GAAI5zB,MAAKktE,EAAOuB,IAAMvB,EAAOwJ,QAAU,OAAS,OA0BpEn3E,GAAOiI,IAAM,WACT,GAAImN,MAAUhO,MAAM1K,KAAKmF,UAAW,EAEpC,OAAO05E,IAAO,WAAYnmE,IAG9BpV,GAAO6I,IAAM,WACT,GAAIuM,MAAUhO,MAAM1K,KAAKmF,UAAW,EAEpC,OAAO05E,IAAO,UAAWnmE,IAI7BpV,GAAOkyE,IAAM,SAAUjB,EAAOxzC,EAAQ+C,EAAQ89B,GAC1C,GAAI1hE,EAkBJ,OAhBuB,iBAAb,KACN0hE,EAAS99B,EACTA,EAAS79B,GAIb/F,KACAA,EAAEqyE,kBAAmB,EACrBryE,EAAEu6E,SAAU,EACZv6E,EAAE2yE,QAAS,EACX3yE,EAAEwyE,GAAK5uC,EACP5jC,EAAEsyE,GAAK+B,EACPr0E,EAAEuyE,GAAK1xC,EACP7gC,EAAEyyE,QAAU/Q,EACZ1hE,EAAE6yE,IAAMlE,IAED6P,GAAWx+E,GAAGs1E,OAIzBlyE,GAAOw/E,KAAO,SAAUvO,GACpB,MAAOjxE,IAAe,IAARixE,IAIlBjxE,GAAOiM,SAAW,SAAUglE,EAAOpsE,GAC/B,GAGIsmB,GACA+2D,EACAC,EACAC,EANAn2E,EAAWglE,EAEXvwE,EAAQ,IAiEZ,OA3DIV,IAAOqiF,WAAWpR,GAClBhlE,GACIouE,GAAIpJ,EAAMtC,cACV7lE,EAAGmoE,EAAMrC,MACT1H,EAAG+J,EAAMpC,SAEW,gBAAVoC,IACdhlE,KACIpH,EACAoH,EAASpH,GAAOosE,EAEhBhlE,EAASytB,aAAeu3C,IAElBvwE,EAAQ47E,GAAwB17E,KAAKqwE,KAC/C9lD,EAAqB,MAAbzqB,EAAM,GAAc,GAAK,EACjCuL,GACIkC,EAAG,EACHrF,EAAGwoE,EAAM5wE,EAAMuyE,KAAS9nD,EACxBrjB,EAAGwpE,EAAM5wE,EAAMyyE,KAAShoD,EACxBxuB,EAAG20E,EAAM5wE,EAAM0yE,KAAWjoD,EAC1BpjB,EAAGupE,EAAM5wE,EAAM2yE,KAAWloD,EAC1BkvD,GAAI/I,EAAM5wE,EAAM4yE,KAAgBnoD,KAE1BzqB,EAAQ67E,GAAiB37E,KAAKqwE,KACxC9lD,EAAqB,MAAbzqB,EAAM,GAAc,GAAK,EACjCyhF,EAAW,SAAUG,GAIjB,GAAIpS,GAAMoS,GAAO9gE,WAAW8gE,EAAI37E,QAAQ,IAAK,KAE7C,QAAQ9F,MAAMqvE,GAAO,EAAIA,GAAO/kD,GAEpClf,GACIkC,EAAGg0E,EAASzhF,EAAM,IAClBwmE,EAAGib,EAASzhF,EAAM,IAClBoI,EAAGq5E,EAASzhF,EAAM,IAClBoH,EAAGq6E,EAASzhF,EAAM,IAClB/D,EAAGwlF,EAASzhF,EAAM,IAClBqH,EAAGo6E,EAASzhF,EAAM,IAClB8qD,EAAG22B,EAASzhF,EAAM,MAEH,MAAZuL,EACPA,KAC2B,gBAAbA,KACT,QAAUA,IAAY,MAAQA,MACnCm2E,EAAUhS,EAAkBpwE,GAAOiM,EAASsZ,MAAOvlB,GAAOiM,EAASuZ,KAEnEvZ,KACAA,EAASouE,GAAK+H,EAAQ1oD,aACtBztB,EAASi7D,EAAIkb,EAAQ7T,QAGzB2T,EAAM,GAAIjU,GAAShiE,GAEfjM,GAAOqiF,WAAWpR,IAAU3F,EAAW2F,EAAO,aAC9CiR,EAAIpT,QAAUmC,EAAMnC,SAGjBoT,GAIXliF,GAAOuiF,QAAUlG,GAGjBr8E,GAAOm+B,cAAgBq+C,GAGvBx8E,GAAO64E,SAAW,aAIlB74E,GAAO0vE,iBAAmBA,GAI1B1vE,GAAOguE,aAAe,aAGtBhuE,GAAOwiF,sBAAwB,SAAUruB,EAAWsuB,GAChD,MAAI5H,IAAuB1mB,KAAexxD,GAC/B,EAEP8/E,IAAU9/E,EACHk4E,GAAuB1mB,IAElC0mB,GAAuB1mB,GAAasuB,GAC7B,IAGXziF,GAAOygC,KAAO4rC,EACV,wDACA,SAAUxnE,EAAKrB,GACX,MAAOxD,IAAOwgC,OAAO37B,EAAKrB,KAOlCxD,GAAOwgC,OAAS,SAAU37B,EAAKoO,GAC3B,GAAIpE,EAcJ,OAbIhK,KAEIgK,EADmB,mBAAb,GACC7O,GAAO0iF,aAAa79E,EAAKoO,GAGzBjT,GAAO8sE,WAAWjoE,GAGzBgK,IACA7O,GAAOiM,SAAS6iE,QAAU9uE,GAAO8uE,QAAUjgE,IAI5C7O,GAAO8uE,QAAQ6T,OAG1B3iF,GAAO0iF,aAAe,SAAUtwE,EAAMa,GAClC,MAAe,QAAXA,GACAA,EAAO2vE,KAAOxwE,EACT4uB,GAAQ5uB,KACT4uB,GAAQ5uB,GAAQ,GAAIq7D,IAExBzsC,GAAQ5uB,GAAMkzC,IAAIryC,GAGlBjT,GAAOwgC,OAAOpuB,GAEP4uB,GAAQ5uB,WAGR4uB,IAAQ5uB,GACR,OAIfpS,GAAO6iF,SAAWxW,EACd,gEACA,SAAUxnE,GACN,MAAO7E,IAAO8sE,WAAWjoE,KAKjC7E,GAAO8sE,WAAa,SAAUjoE,GAC1B,GAAI27B,EAMJ,IAJI37B,GAAOA,EAAIiqE,SAAWjqE,EAAIiqE,QAAQ6T,QAClC99E,EAAMA,EAAIiqE,QAAQ6T,QAGjB99E,EACD,MAAO7E,IAAO8uE,OAGlB,KAAKzsE,EAAQwC,GAAM,CAGf,GADA27B,EAASuzC,EAAWlvE,GAEhB,MAAO27B,EAEX37B,IAAOA,GAGX,MAAOgvE,GAAahvE,IAIxB7E,GAAOoD,SAAW,SAAU8b,GACxB,MAAOA,aAAewuD,IACV,MAAPxuD,GAAeosD,EAAWpsD,EAAK,qBAIxClf,GAAOqiF,WAAa,SAAUnjE,GAC1B,MAAOA,aAAe+uD,GAG1B,KAAKtsE,GAAI89E,GAAM39E,OAAS,EAAGH,IAAK,IAAKA,GACjCmwE,EAAS2N,GAAM99E,IAGnB3B,IAAOuxE,eAAiB,SAAUC,GAC9B,MAAOD,GAAeC,IAG1BxxE,GAAOq7E,QAAU,SAAUyH,GACvB,GAAInmF,GAAIqD,GAAOkyE,IAAIyH,IAQnB,OAPa,OAATmJ,EACArhF,EAAO9E,EAAE8yE,IAAKqT,GAGdnmF,EAAE8yE,IAAI1D,iBAAkB,EAGrBpvE,GAGXqD,GAAO+iF,UAAY,WACf,MAAO/iF,IAAOkU,MAAM,KAAMrS,WAAWkhF,aAGzC/iF,GAAOi3E,kBAAoB,SAAUhG,GACjC,MAAOK,GAAML,IAAUK,EAAML,GAAS,GAAK,KAAO,MAGtDjxE,GAAOQ,OAASA,EAOhBiB,EAAOzB,GAAOqV,GAAKq4D,EAAOp+D,WAEtBilB,MAAQ,WACJ,MAAOv0B,IAAO7D,OAGlBgH,QAAU,WACN,OAAQhH,KAAKk4B,GAA4B,KAArBl4B,KAAKqzE,SAAW,IAGxCgQ,KAAO,WACH,MAAOn+E,MAAKC,OAAOnF,KAAO,MAG9BqF,SAAW,WACP,MAAOrF,MAAKo4B,QAAQiM,OAAO,MAAM/C,OAAO,qCAG5Cp6B,OAAS,WACL,MAAOlH,MAAKqzE,QAAU,GAAI/uE,OAAMtE,MAAQA,KAAKk4B,IAGjD9wB,YAAc,WACV,GAAI5G,GAAIqD,GAAO7D,MAAM+1E,KACrB,OAAI,GAAIv1E,EAAE+3B,QAAU/3B,EAAE+3B,QAAU,KACxB,kBAAsBj0B,MAAK6O,UAAU/L,YAE9BpH,KAAKkH,SAASE,cAEdkxE,EAAa93E,EAAG,gCAGpB83E,EAAa93E,EAAG,mCAI/B+H,QAAU,WACN,GAAI/H,GAAIR,IACR,QACIQ,EAAE+3B,OACF/3B,EAAEk4B,QACFl4B,EAAEi4B,OACFj4B,EAAE48B,QACF58B,EAAE68B,UACF78B,EAAE88B,UACF98B,EAAE+8B,iBAIV85C,QAAU,WACN,MAAOA,GAAQr3E,OAGnB6mF,aAAe,WACX,MAAI7mF,MAAK42E,GACE52E,KAAKq3E,WAAatC,EAAc/0E,KAAK42E,IAAK52E,KAAKozE,OAASvvE,GAAOkyE,IAAI/1E,KAAK42E,IAAM/yE,GAAO7D,KAAK42E,KAAKruE,WAAa,GAGhH,GAGXu+E,aAAe,WACX,MAAOxhF,MAAWtF,KAAKszE,MAG3ByT,UAAW,WACP,MAAO/mF,MAAKszE,IAAIzvD,UAGpBkyD,IAAM,SAAUiR,GACZ,MAAOhnF,MAAKgjF,UAAU,EAAGgE,IAG7B/O,MAAQ,SAAU+O,GASd,MARIhnF,MAAKozE,SACLpzE,KAAKgjF,UAAU,EAAGgE,GAClBhnF,KAAKozE,QAAS,EAEV4T,GACAhnF,KAAKqrB,SAASrrB,KAAKinF,iBAAkB,MAGtCjnF,MAGXshC,OAAS,SAAU4lD,GACf,GAAItT,GAAS0E,EAAat4E,KAAMknF,GAAerjF,GAAOm+B,cACtD,OAAOhiC,MAAK2wE,aAAaiV,WAAWhS,IAGxC3gE,IAAMmhE,EAAY,EAAG,OAErB/oD,SAAW+oD,EAAY,GAAI,YAE3B/nD,KAAO,SAAUyoD,EAAOO,EAAO8R,GAC3B,GAEY96D,GAAMunD,EAFdwT,EAAOlT,EAAOY,EAAO90E,MACrBqnF,EAAmD,KAAvCD,EAAKpE,YAAchjF,KAAKgjF,YAqBxC,OAlBA3N,GAAQD,EAAeC,GAET,SAAVA,GAA8B,UAAVA,GAA+B,YAAVA,GACzCzB,EAAS/C,EAAU7wE,KAAMonF,GACX,YAAV/R,EACAzB,GAAkB,EACD,SAAVyB,IACPzB,GAAkB,MAGtBvnD,EAAOrsB,KAAOonF,EACdxT,EAAmB,WAAVyB,EAAqBhpD,EAAO,IACvB,WAAVgpD,EAAqBhpD,EAAO,IAClB,SAAVgpD,EAAmBhpD,EAAO,KAChB,QAAVgpD,GAAmBhpD,EAAOg7D,GAAY,MAC5B,SAAVhS,GAAoBhpD,EAAOg7D,GAAY,OACvCh7D,GAED86D,EAAUvT,EAASJ,EAASI,IAGvCxqD,KAAO,SAAU+Q,EAAMmkD,GACnB,MAAOz6E,IAAOiM,UAAUuZ,GAAIrpB,KAAMopB,KAAM+Q,IAAOkK,OAAOrkC,KAAKqkC,UAAUijD,UAAUhJ,IAGnFiJ,QAAU,SAAUjJ,GAChB,MAAOt+E,MAAKopB,KAAKvlB,KAAUy6E,IAG/B4G,SAAW,SAAU/qD,GAIjB,GAAIgD,GAAMhD,GAAQt2B,KACd2jF,EAAMtT,EAAO/2C,EAAKn9B,MAAMynF,QAAQ,OAChCp7D,EAAOrsB,KAAKqsB,KAAKm7D,EAAK,QAAQ,GAC9BlmD,EAAgB,GAAPjV,EAAY,WACV,GAAPA,EAAY,WACL,EAAPA,EAAW,UACJ,EAAPA,EAAW,UACJ,EAAPA,EAAW,UACJ,EAAPA,EAAW,WAAa,UAChC,OAAOrsB,MAAKshC,OAAOthC,KAAK2wE,aAAauU,SAAS5jD,EAAQthC,KAAM6D,GAAOs5B,MAGvEw5C,WAAa,WACT,MAAOA,GAAW32E,KAAKu4B,SAG3BmvD,MAAQ,WACJ,MAAQ1nF,MAAKgjF,YAAchjF,KAAKo4B,QAAQM,MAAM,GAAGsqD,aAC7ChjF,KAAKgjF,YAAchjF,KAAKo4B,QAAQM,MAAM,GAAGsqD,aAGjD3qD,IAAM,SAAUy8C,GACZ,GAAIz8C,GAAMr4B,KAAKozE,OAASpzE,KAAKk4B,GAAG8mD,YAAch/E,KAAKk4B,GAAGyvD,QACtD,OAAa,OAAT7S,GACAA,EAAQsJ,GAAatJ,EAAO90E,KAAK2wE,cAC1B3wE,KAAKiT,IAAI6hE,EAAQz8C,EAAK,MAEtBA,GAIfK,MAAQ8mD,GAAa,SAAS,GAE9BiI,QAAU,SAAUpS,GAIhB,OAHAA,EAAQD,EAAeC,IAIvB,IAAK,OACDr1E,KAAK04B,MAAM,EAEf,KAAK,UACL,IAAK,QACD14B,KAAKy4B,KAAK,EAEd,KAAK,OACL,IAAK,UACL,IAAK,MACDz4B,KAAKo9B,MAAM,EAEf,KAAK,OACDp9B,KAAKq9B,QAAQ,EAEjB,KAAK,SACDr9B,KAAKs9B,QAAQ,EAEjB,KAAK,SACDt9B,KAAKu9B,aAAa,GAgBtB,MAXc,SAAV83C,EACAr1E,KAAK6hC,QAAQ,GACI,YAAVwzC,GACPr1E,KAAK2iF,WAAW,GAIN,YAAVtN,GACAr1E,KAAK04B,MAAqC,EAA/BxzB,KAAKC,MAAMnF,KAAK04B,QAAU,IAGlC14B,MAGX4nF,MAAO,SAAUvS,GAEb,MADAA,GAAQD,EAAeC,GACnBA,IAAU7uE,GAAuB,gBAAV6uE,EAChBr1E,KAEJA,KAAKynF,QAAQpS,GAAOpiE,IAAI,EAAc,YAAVoiE,EAAsB,OAASA,GAAQhqD,SAAS,EAAG,OAG1F2oD,QAAS,SAAUc,EAAOO,GACtB,GAAIwS,EAEJ,OADAxS,GAAQD,EAAgC,mBAAVC,GAAwBA,EAAQ,eAChD,gBAAVA,GACAP,EAAQjxE,GAAOoD,SAAS6tE,GAASA,EAAQjxE,GAAOixE,IACxC90E,MAAQ80E,IAEhB+S,EAAUhkF,GAAOoD,SAAS6tE,IAAUA,GAASjxE,GAAOixE,GAC7C+S,GAAW7nF,KAAKo4B,QAAQqvD,QAAQpS,KAI/ClB,SAAU,SAAUW,EAAOO,GACvB,GAAIwS,EAEJ,OADAxS,GAAQD,EAAgC,mBAAVC,GAAwBA,EAAQ,eAChD,gBAAVA,GACAP,EAAQjxE,GAAOoD,SAAS6tE,GAASA,EAAQjxE,GAAOixE,IAChCA,GAAR90E,OAER6nF,EAAUhkF,GAAOoD,SAAS6tE,IAAUA,GAASjxE,GAAOixE,IAC5C90E,KAAKo4B,QAAQwvD,MAAMvS,GAASwS,IAI5CC,UAAW,SAAU1+D,EAAMC,EAAIgsD,GAC3B,MAAOr1E,MAAKg0E,QAAQ5qD,EAAMisD,IAAUr1E,KAAKm0E,SAAS9qD,EAAIgsD,IAG1DpxC,OAAQ,SAAU6wC,EAAOO,GACrB,GAAIwS,EAEJ,OADAxS,GAAQD,EAAeC,GAAS,eAClB,gBAAVA,GACAP,EAAQjxE,GAAOoD,SAAS6tE,GAASA,EAAQjxE,GAAOixE,IACxC90E,QAAU80E,IAElB+S,GAAWhkF,GAAOixE,IACT90E,KAAKo4B,QAAQqvD,QAAQpS,IAAWwS,GAAWA,IAAa7nF,KAAKo4B,QAAQwvD,MAAMvS,KAI5FvpE,IAAKokE,EACI,mGACA,SAAUtqE,GAEN,MADAA,GAAQ/B,GAAOkU,MAAM,KAAMrS,WACZ1F,KAAR4F,EAAe5F,KAAO4F,IAI1C8G,IAAKwjE,EACG,mGACA,SAAUtqE,GAEN,MADAA,GAAQ/B,GAAOkU,MAAM,KAAMrS,WACpBE,EAAQ5F,KAAOA,KAAO4F,IAIzCmiF,KAAO7X,EACC,4GAEA,SAAU4E,EAAOkS,GACb,MAAa,OAATlS,GACqB,gBAAVA,KACPA,GAASA,GAGb90E,KAAKgjF,UAAUlO,EAAOkS,GAEfhnF,OAECA,KAAKgjF,cAe7BA,UAAY,SAAUlO,EAAOkS,GACzB,GACIgB,GADAr+D,EAAS3pB,KAAKqzE,SAAW,CAE7B,OAAa,OAATyB,GACqB,gBAAVA,KACPA,EAAQuF,EAAoBvF,IAE5B5vE,KAAK2lB,IAAIiqD,GAAS,KAClBA,EAAgB,GAARA,IAEP90E,KAAKozE,QAAU4T,IAChBgB,EAAchoF,KAAKinF,kBAEvBjnF,KAAKqzE,QAAUyB,EACf90E,KAAKozE,QAAS,EACK,MAAf4U,GACAhoF,KAAKiT,IAAI+0E,EAAa,KAEtBr+D,IAAWmrD,KACNkS,GAAiBhnF,KAAKioF,kBACvB1T,EAAgCv0E,KACxB6D,GAAOiM,SAASglE,EAAQnrD,EAAQ,KAAM,GAAG,GACzC3pB,KAAKioF,oBACbjoF,KAAKioF,mBAAoB,EACzBpkF,GAAOguE,aAAa7xE,MAAM,GAC1BA,KAAKioF,kBAAoB,OAI1BjoF,MAEAA,KAAKozE,OAASzpD,EAAS3pB,KAAKinF,kBAI3CiB,QAAU,WACN,OAAQloF,KAAKozE,QAGjB+U,YAAc,WACV,MAAOnoF,MAAKozE,QAGhBgV,MAAQ,WACJ,MAAOpoF,MAAKozE,QAA2B,IAAjBpzE,KAAKqzE,SAG/B6P,SAAW,WACP,MAAOljF,MAAKozE,OAAS,MAAQ,IAGjCgQ,SAAW,WACP,MAAOpjF,MAAKozE,OAAS,6BAA+B,IAGxDwT,UAAY,WAMR,MALI5mF,MAAKmzE,KACLnzE,KAAKgjF,UAAUhjF,KAAKmzE,MACM,gBAAZnzE,MAAK+yE,IACnB/yE,KAAKgjF,UAAU3I,EAAoBr6E,KAAK+yE,KAErC/yE,MAGXqoF,qBAAuB,SAAUvT,GAQ7B,MAHIA,GAJCA,EAIOjxE,GAAOixE,GAAOkO,YAHd,GAMJhjF,KAAKgjF,YAAclO,GAAS,KAAO,GAG/CqB,YAAc,WACV,MAAOA,GAAYn2E,KAAKu4B,OAAQv4B,KAAK04B,UAGzCJ,UAAY,SAAUw8C,GAClB,GAAIx8C,GAAY5K,IAAO7pB,GAAO7D,MAAMynF,QAAQ,OAAS5jF,GAAO7D,MAAMynF,QAAQ,SAAW,OAAS,CAC9F,OAAgB,OAAT3S,EAAgBx8C,EAAYt4B,KAAKiT,IAAK6hE,EAAQx8C,EAAY,MAGrE65C,QAAU,SAAU2C,GAChB,MAAgB,OAATA,EAAgB5vE,KAAKwyC,MAAM13C,KAAK04B,QAAU,GAAK,GAAK14B,KAAK04B,MAAoB,GAAbo8C,EAAQ,GAAS90E,KAAK04B,QAAU,IAG3G2iD,SAAW,SAAUvG,GACjB,GAAIv8C,GAAOk+C,GAAWz2E,KAAMA,KAAK2wE,aAAa+K,MAAMnF,IAAKv2E,KAAK2wE,aAAa+K,MAAMlF,KAAKj+C,IACtF,OAAgB,OAATu8C,EAAgBv8C,EAAOv4B,KAAKiT,IAAK6hE,EAAQv8C,EAAO,MAG3DiqD,YAAc,SAAU1N,GACpB,GAAIv8C,GAAOk+C,GAAWz2E,KAAM,EAAG,GAAGu4B,IAClC,OAAgB,OAATu8C,EAAgBv8C,EAAOv4B,KAAKiT,IAAK6hE,EAAQv8C,EAAO,MAG3D+5C,KAAO,SAAUwC,GACb,GAAIxC,GAAOtyE,KAAK2wE,aAAa2B,KAAKtyE,KAClC,OAAgB,OAAT80E,EAAgBxC,EAAOtyE,KAAKiT,IAAqB,GAAhB6hE,EAAQxC,GAAW,MAG/D2P,QAAU,SAAUnN,GAChB,GAAIxC,GAAOmE,GAAWz2E,KAAM,EAAG,GAAGsyE,IAClC,OAAgB,OAATwC,EAAgBxC,EAAOtyE,KAAKiT,IAAqB,GAAhB6hE,EAAQxC,GAAW,MAG/DzwC,QAAU,SAAUizC,GAChB,GAAIjzC,IAAW7hC,KAAKq4B,MAAQ,EAAIr4B,KAAK2wE,aAAa+K,MAAMnF,KAAO,CAC/D,OAAgB,OAATzB,EAAgBjzC,EAAU7hC,KAAKiT,IAAI6hE,EAAQjzC,EAAS,MAG/D8gD,WAAa,SAAU7N,GAInB,MAAgB,OAATA,EAAgB90E,KAAKq4B,OAAS,EAAIr4B,KAAKq4B,IAAIr4B,KAAKq4B,MAAQ,EAAIy8C,EAAQA,EAAQ,IAGvFwT,eAAiB,WACb,MAAOhS,GAAYt2E,KAAKu4B,OAAQ,EAAG,IAGvC+9C,YAAc,WACV,GAAIiS,GAAWvoF,KAAK2wE,aAAa+K,KACjC,OAAOpF,GAAYt2E,KAAKu4B,OAAQgwD,EAAShS,IAAKgS,EAAS/R,MAG3DthE,IAAM,SAAUmgE,GAEZ,MADAA,GAAQD,EAAeC,GAChBr1E,KAAKq1E,MAGhBlsB,IAAM,SAAUksB,EAAOhuE,GACnB,GAAIk4E,EACJ,IAAqB,gBAAVlK,GACP,IAAKkK,IAAQlK,GACTr1E,KAAKmpD,IAAIo2B,EAAMlK,EAAMkK,QAIzBlK,GAAQD,EAAeC,GACI,kBAAhBr1E,MAAKq1E,IACZr1E,KAAKq1E,GAAOhuE,EAGpB,OAAOrH,OAMXqkC,OAAS,SAAU37B,GACf,GAAI8/E,EAEJ,OAAI9/E,KAAQlC,EACDxG,KAAK2yE,QAAQ6T,OAEpBgC,EAAgB3kF,GAAO8sE,WAAWjoE,GACb,MAAjB8/E,IACAxoF,KAAK2yE,QAAU6V,GAEZxoF,OAIfskC,KAAO4rC,EACH,kJACA,SAAUxnE,GACN,MAAIA,KAAQlC,EACDxG,KAAK2wE,aAEL3wE,KAAKqkC,OAAO37B,KAK/BioE,WAAa,WACT,MAAO3wE,MAAK2yE,SAGhBsU,eAAiB,WAGb,MAAuD,KAA/C/hF,KAAKwoB,MAAM1tB,KAAKk4B,GAAGuwD,oBAAsB,OA+CzD5kF,GAAOqV,GAAGuoB,YAAc59B,GAAOqV,GAAGqkB,aAAeiiD,GAAa,gBAAgB,GAC9E37E,GAAOqV,GAAGwoB,OAAS79B,GAAOqV,GAAGokB,QAAUkiD,GAAa,WAAW,GAC/D37E,GAAOqV,GAAGyoB,OAAS99B,GAAOqV,GAAGmkB,QAAUmiD,GAAa,WAAW,GAK/D37E,GAAOqV,GAAG0oB,KAAO/9B,GAAOqV,GAAGkkB,MAAQoiD,GAAa,SAAS,GAEzD37E,GAAOqV,GAAGuf,KAAO+mD,GAAa,QAAQ,GACtC37E,GAAOqV,GAAGsgB,MAAQ02C,EAAU,kDAAmDsP,GAAa,QAAQ,IACpG37E,GAAOqV,GAAGqf,KAAOinD,GAAa,YAAY,GAC1C37E,GAAOqV,GAAG+4D,MAAQ/B,EAAU,kDAAmDsP,GAAa,YAAY,IAGxG37E,GAAOqV,GAAGq5D,KAAO1uE,GAAOqV,GAAGmf,IAC3Bx0B,GAAOqV,GAAGk5D,OAASvuE,GAAOqV,GAAGwf,MAC7B70B,GAAOqV,GAAGm5D,MAAQxuE,GAAOqV,GAAGo5D,KAC5BzuE,GAAOqV,GAAGwvE,SAAW7kF,GAAOqV,GAAG+oE,QAC/Bp+E,GAAOqV,GAAGg5D,SAAWruE,GAAOqV,GAAGi5D,QAG/BtuE,GAAOqV,GAAGyvE,OAAS9kF,GAAOqV,GAAG9R,YAG7BvD,GAAOqV,GAAG0vE,MAAQ/kF,GAAOqV,GAAGkvE,MAkB5B9iF,EAAOzB,GAAOiM,SAASoJ,GAAK44D,EAAS3+D,WAEjCy/D,QAAU,WACN,GAIIt1C,GAASD,EAASD,EAJlBG,EAAev9B,KAAKwyE,cACpBD,EAAOvyE,KAAKyyE,MACZL,EAASpyE,KAAK0yE,QACdhgE,EAAO1S,KAAK4S,MACaq/D,EAAQ,CAIrCv/D,GAAK6qB,aAAeA,EAAe,IAEnCD,EAAUk2C,EAASj2C,EAAe,KAClC7qB,EAAK4qB,QAAUA,EAAU,GAEzBD,EAAUm2C,EAASl2C,EAAU,IAC7B5qB,EAAK2qB,QAAUA,EAAU,GAEzBD,EAAQo2C,EAASn2C,EAAU,IAC3B3qB,EAAK0qB,MAAQA,EAAQ,GAErBm1C,GAAQiB,EAASp2C,EAAQ,IAGzB60C,EAAQuB,EAASkM,GAAYnN,IAC7BA,GAAQiB,EAASmM,GAAY1N,IAI7BG,GAAUoB,EAASjB,EAAO,IAC1BA,GAAQ,GAGRN,GAASuB,EAASpB,EAAS,IAC3BA,GAAU,GAEV1/D,EAAK6/D,KAAOA,EACZ7/D,EAAK0/D,OAASA,EACd1/D,EAAKu/D,MAAQA,GAGjBpnD,IAAM,WAYF,MAXA7qB,MAAKwyE,cAAgBttE,KAAK2lB,IAAI7qB,KAAKwyE,eACnCxyE,KAAKyyE,MAAQvtE,KAAK2lB,IAAI7qB,KAAKyyE,OAC3BzyE,KAAK0yE,QAAUxtE,KAAK2lB,IAAI7qB,KAAK0yE,SAE7B1yE,KAAK4S,MAAM2qB,aAAer4B,KAAK2lB,IAAI7qB,KAAK4S,MAAM2qB,cAC9Cv9B,KAAK4S,MAAM0qB,QAAUp4B,KAAK2lB,IAAI7qB,KAAK4S,MAAM0qB,SACzCt9B,KAAK4S,MAAMyqB,QAAUn4B,KAAK2lB,IAAI7qB,KAAK4S,MAAMyqB,SACzCr9B,KAAK4S,MAAMwqB,MAAQl4B,KAAK2lB,IAAI7qB,KAAK4S,MAAMwqB,OACvCp9B,KAAK4S,MAAMw/D,OAASltE,KAAK2lB,IAAI7qB,KAAK4S,MAAMw/D,QACxCpyE,KAAK4S,MAAMq/D,MAAQ/sE,KAAK2lB,IAAI7qB,KAAK4S,MAAMq/D,OAEhCjyE,MAGXqyE,MAAQ,WACJ,MAAOmB,GAASxzE,KAAKuyE,OAAS,IAGlCvrE,QAAU,WACN,MAAOhH,MAAKwyE,cACG,MAAbxyE,KAAKyyE,MACJzyE,KAAK0yE,QAAU,GAAM,OACK,QAA3ByC,EAAMn1E,KAAK0yE,QAAU,KAG3B4U,SAAW,SAAUuB,GACjB,GAAIjV,GAAS4K,GAAax+E,MAAO6oF,EAAY7oF,KAAK2wE,aAMlD,OAJIkY,KACAjV,EAAS5zE,KAAK2wE,aAAa+U,YAAY1lF,KAAM4zE,IAG1C5zE,KAAK2wE,aAAaiV,WAAWhS,IAGxC3gE,IAAM,SAAU6hE,EAAOjC,GAEnB,GAAIwB,GAAMxwE,GAAOiM,SAASglE,EAAOjC,EAQjC,OANA7yE,MAAKwyE,eAAiB6B,EAAI7B,cAC1BxyE,KAAKyyE,OAAS4B,EAAI5B,MAClBzyE,KAAK0yE,SAAW2B,EAAI3B,QAEpB1yE,KAAK4yE,UAEE5yE,MAGXqrB,SAAW,SAAUypD,EAAOjC,GACxB,GAAIwB,GAAMxwE,GAAOiM,SAASglE,EAAOjC,EAQjC,OANA7yE,MAAKwyE,eAAiB6B,EAAI7B,cAC1BxyE,KAAKyyE,OAAS4B,EAAI5B,MAClBzyE,KAAK0yE,SAAW2B,EAAI3B,QAEpB1yE,KAAK4yE,UAEE5yE,MAGXkV,IAAM,SAAUmgE,GAEZ,MADAA,GAAQD,EAAeC,GAChBr1E,KAAKq1E,EAAM9wC,cAAgB,QAGtCtV,GAAK,SAAUomD,GACX,GAAI9C,GAAMH,CAGV,IAFAiD,EAAQD,EAAeC,GAET,UAAVA,GAA+B,SAAVA,EAGrB,MAFA9C,GAAOvyE,KAAKyyE,MAAQzyE,KAAKwyE,cAAgB,MACzCJ,EAASpyE,KAAK0yE,QAA8B,GAApBgN,GAAYnN,GACnB,UAAV8C,EAAoBjD,EAASA,EAAS,EAI7C,QADAG,EAAOvyE,KAAKyyE,MAAQvtE,KAAKwoB,MAAMiyD,GAAY3/E,KAAK0yE,QAAU,KAClD2C,GACJ,IAAK,OAAQ,MAAO9C,GAAO,EAAIvyE,KAAKwyE,cAAgB,MACpD,KAAK,MAAO,MAAOD,GAAOvyE,KAAKwyE,cAAgB,KAC/C,KAAK,OAAQ,MAAc,IAAPD,EAAYvyE,KAAKwyE,cAAgB,IACrD,KAAK,SAAU,MAAc,IAAPD,EAAY,GAAKvyE,KAAKwyE,cAAgB,GAC5D,KAAK,SAAU,MAAc,IAAPD,EAAY,GAAK,GAAKvyE,KAAKwyE,cAAgB,GAEjE,KAAK,cAAe,MAAOttE,MAAKC,MAAa,GAAPotE,EAAY,GAAK,GAAK,KAAQvyE,KAAKwyE,aACzE,SAAS,KAAM,IAAI5uE,OAAM,gBAAkByxE,KAKvD/wC,KAAOzgC,GAAOqV,GAAGorB,KACjBD,OAASxgC,GAAOqV,GAAGmrB,OAEnBykD,YAAc5Y,EACV,sFAEA,WACI,MAAOlwE,MAAKoH,gBAIpBA,YAAc,WAEV,GAAI6qE,GAAQ/sE,KAAK2lB,IAAI7qB,KAAKiyE,SACtBG,EAASltE,KAAK2lB,IAAI7qB,KAAKoyE,UACvBG,EAAOrtE,KAAK2lB,IAAI7qB,KAAKuyE,QACrBn1C,EAAQl4B,KAAK2lB,IAAI7qB,KAAKo9B,SACtBC,EAAUn4B,KAAK2lB,IAAI7qB,KAAKq9B,WACxBC,EAAUp4B,KAAK2lB,IAAI7qB,KAAKs9B,UAAYt9B,KAAKu9B,eAAiB,IAE9D,OAAKv9B,MAAK+oF,aAMF/oF,KAAK+oF,YAAc,EAAI,IAAM,IACjC,KACC9W,EAAQA,EAAQ,IAAM,KACtBG,EAASA,EAAS,IAAM,KACxBG,EAAOA,EAAO,IAAM,KACnBn1C,GAASC,GAAWC,EAAW,IAAM,KACtCF,EAAQA,EAAQ,IAAM,KACtBC,EAAUA,EAAU,IAAM,KAC1BC,EAAUA,EAAU,IAAM,IAXpB,OAcfqzC,WAAa,WACT,MAAO3wE,MAAK2yE,SAGhBgW,OAAS,WACL,MAAO3oF,MAAKoH,iBAIpBvD,GAAOiM,SAASoJ,GAAG7T,SAAWxB,GAAOiM,SAASoJ,GAAG9R,WAQjD,KAAK5B,KAAK86E,IACFnR,EAAWmR,GAAwB96E,KACnCo6E,GAAmBp6E,GAAE++B,cAI7B1gC,IAAOiM,SAASoJ,GAAG8vE,eAAiB,WAChC,MAAOhpF,MAAKivB,GAAG,OAEnBprB,GAAOiM,SAASoJ,GAAG6vE,UAAY,WAC3B,MAAO/oF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAG+vE,UAAY,WAC3B,MAAOjpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGgwE,QAAU,WACzB,MAAOlpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGiwE,OAAS,WACxB,MAAOnpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGkwE,QAAU,WACzB,MAAOppF,MAAKivB,GAAG,UAEnBprB,GAAOiM,SAASoJ,GAAGmwE,SAAW,WAC1B,MAAOrpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGowE,QAAU,WACzB,MAAOtpF,MAAKivB,GAAG,MASnBprB,GAAOwgC,OAAO,MACVklD,aAAc,uBACd3Y,QAAU,SAAU6C,GAChB,GAAIrtE,GAAIqtE,EAAS,GACbG,EAAuC,IAA7BuB,EAAM1B,EAAS,IAAM,IAAa,KACrC,IAANrtE,EAAW,KACL,IAANA,EAAW,KACL,IAANA,EAAW,KAAO,IACvB,OAAOqtE,GAASG,KA4BpBkE,GACAj4E,EAAOD,QAAUiE,IAEfmrE,EAAgC,SAAUwa,EAAS5pF,EAASC,GAM1D,MALIA,GAAO2xE,QAAU3xE,EAAO2xE,UAAY3xE,EAAO2xE,SAASiY,YAAa,IAEjExJ,GAAYp8E,OAASm8E,IAGlBn8E,IACTtD,KAAKX,EAASM,EAAqBN,EAASC,KAASmvE,IAAkCxoE,IAAc3G,EAAOD,QAAUovE,IACxH6Q,IAAW,MAIhBt/E,KAAKP,QAEqBO,KAAKX,EAAU,WAAa,MAAOI,SAAYE,EAAoB,IAAIL,KAIhG,SAASA,EAAQD,GAErB,GAAI8pF,GAAgCC,EAA8B3a,GAOjE,SAAUtvE,EAAMC,GAGXgqF,KAAmCD,EAAiC,EAAW1a,EAA2E,kBAAnC0a,GAAiDA,EAA+B3xE,MAAMnY,EAAS+pF,GAAiCD,IAAmEljF,SAAlCwoE,IAAgDnvE,EAAOD,QAAUovE,KAU7VhvE,KAAM,WAEN,QAASmlD,GAAS12C,GAChB,GAOIjJ,GAPA6D,EAAiBoF,GAAWA,EAAQpF,iBAAkB,EAEtDkQ,EAAY9K,GAAWA,EAAQ8K,WAAahQ,OAE5CqgF,KACAC,GAAUC,WAAYC,UACtBC,IAIJ,KAAKxkF,EAAI,GAAS,KAALA,EAAUA,IAAMwkF,EAAM5lF,OAAO6lF,aAAazkF,KAAOuyE,KAAK,IAAMvyE,EAAI,IAAK8L,OAAO,EAEzF,KAAK9L,EAAI,GAAS,IAALA,EAASA,IAAMwkF,EAAM5lF,OAAO6lF,aAAazkF,KAAOuyE,KAAKvyE,EAAG8L,OAAO,EAE5E,KAAK9L,EAAI,EAAS,GAALA,EAAUA,IAAMwkF,EAAM,GAAKxkF,IAAMuyE,KAAK,GAAKvyE,EAAG8L,OAAO,EAElE,KAAK9L,EAAI,EAAS,IAALA,EAAWA,IAAMwkF,EAAM,IAAMxkF,IAAMuyE,KAAK,IAAMvyE,EAAG8L,OAAO,EAErE,KAAK9L,EAAI,EAAS,GAALA,EAAUA,IAAMwkF,EAAM,MAAQxkF,IAAMuyE,KAAK,GAAKvyE,EAAG8L,OAAO,EAGrE04E,GAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAElC04E,EAAY,MAAMjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAU,IAAQjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAa,OAAKjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAY,MAAMjS,KAAK,GAAIzmE,OAAO,GAElC04E,EAAa,OAAKjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAa,OAAKjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAa,OAAKjS,KAAK,GAAIzmE,MAAO9K,QAClCwjF,EAAW,KAAOjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAiB,WAAKjS,KAAK,EAAGzmE,OAAO,GACrC04E,EAAW,KAAWjS,KAAK,EAAGzmE,OAAO,GACrC04E,EAAY,MAAUjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAW,KAAWjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAM,WAAgBjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAc,QAAQjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAgB,UAAMjS,KAAK,GAAIzmE,OAAO,GAEtC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,GACnC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,GACnC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,GACnC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,EAInC,IAAI44E,GAAO,SAAS5gF,GAAQ6gF,EAAY7gF,EAAM,YAC1C8gF,EAAK,SAAS9gF,GAAQ6gF,EAAY7gF,EAAM,UAGxC6gF,EAAc,SAAS7gF,EAAMxC,GAC/B,GAAoCN,SAAhCqjF,EAAO/iF,GAAMwC,EAAM+gF,SAAwB,CAE7C,IAAK,GADDC,GAAQT,EAAO/iF,GAAMwC,EAAM+gF,SACtB7kF,EAAI,EAAGA,EAAI8kF,EAAM3kF,OAAQH,IACTgB,SAAnB8jF,EAAM9kF,GAAG8L,MACXg5E,EAAM9kF,GAAG0T,GAAG5P,GAEa,GAAlBghF,EAAM9kF,GAAG8L,OAAmC,GAAlBhI,EAAMyqC,SACvCu2C,EAAM9kF,GAAG0T,GAAG5P,GAEa,GAAlBghF,EAAM9kF,GAAG8L,OAAoC,GAAlBhI,EAAMyqC,UACxCu2C,EAAM9kF,GAAG0T,GAAG5P,EAIM,IAAlBD,GACFC,EAAMD,kBA4FZ,OAtFAugF,GAAiB90D,KAAO,SAASpsB,EAAKJ,EAAUxB,GAI9C,GAHaN,SAATM,IACFA,EAAO,WAEUN,SAAfwjF,EAAMthF,GACR,KAAM,IAAI9E,OAAM,oBAAsB8E,EAEFlC,UAAlCqjF,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,QAC1B8R,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,UAE1B8R,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,MAAM/vE,MAAMkR,GAAG5Q,EAAUgJ,MAAM04E,EAAMthF,GAAK4I,SAKpEs4E,EAAiBW,QAAU,SAASjiF,EAAUxB,GAC/BN,SAATM,IACFA,EAAO,UAET,KAAK,GAAI4B,KAAOshF,GACVA,EAAMlkF,eAAe4C,IACvBkhF,EAAiB90D,KAAKpsB,EAAIJ,EAASxB,IAMzC8iF,EAAiBY,OAAS,SAASlhF,GACjC,IAAK,GAAIZ,KAAOshF,GACd,GAAIA,EAAMlkF,eAAe4C,GAAM,CAC7B,GAAsB,GAAlBY,EAAMyqC,UAAwC,GAApBi2C,EAAMthF,GAAK4I,OAAiBhI,EAAM+gF,SAAWL,EAAMthF,GAAKqvE,KACpF,MAAOrvE,EAEJ,IAAsB,GAAlBY,EAAMyqC,UAAyC,GAApBi2C,EAAMthF,GAAK4I,OAAkBhI,EAAM+gF,SAAWL,EAAMthF,GAAKqvE,KAC3F,MAAOrvE,EAEJ,IAAIY,EAAM+gF,SAAWL,EAAMthF,GAAKqvE,MAAe,SAAPrvE,EAC3C,MAAOA,GAIb,MAAO,wCAITkhF,EAAiBrb,OAAS,SAAS7lE,EAAKJ,EAAUxB,GAIhD,GAHaN,SAATM,IACFA,EAAO,WAEUN,SAAfwjF,EAAMthF,GACR,KAAM,IAAI9E,OAAM,oBAAsB8E,EAExC,IAAiBlC,SAAb8B,EAAwB,CAC1B,GAAImiF,MACAH,EAAQT,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,KACpC,IAAcvxE,SAAV8jF,EACF,IAAK,GAAI9kF,GAAI,EAAGA,EAAI8kF,EAAM3kF,OAAQH,KAC1B8kF,EAAM9kF,GAAG0T,IAAM5Q,GAAYgiF,EAAM9kF,GAAG8L,OAAS04E,EAAMthF,GAAK4I,QAC5Dm5E,EAAYziF,KAAK6hF,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,MAAMvyE,GAIrDqkF,GAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,MAAQ0S,MAGhCZ,GAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,UAK5B6R,EAAiBpgC,MAAQ,WACvBqgC,GAAUC,WAAYC,WAIxBH,EAAiBt2E,QAAU,WACzBu2E,GAAUC,WAAYC,UACtBxwE,EAAUpQ,oBAAoB,UAAW+gF,GAAM,GAC/C3wE,EAAUpQ,oBAAoB,QAASihF,GAAI,IAI7C7wE,EAAU5Q,iBAAiB,UAAUuhF,GAAK,GAC1C3wE,EAAU5Q,iBAAiB,QAAQyhF,GAAG,GAG/BR,EAGT,MAAOzkC,MAQL,SAAStlD,EAAQD,GAErB,GAAI8pF,GAAgCC,EAA8B3a,GAEjE,SAAUtvE,EAAMC,GAGXgqF,KAAmCD,EAAiC,EAAW1a,EAA2E,kBAAnC0a,GAAiDA,EAA+B3xE,MAAMnY,EAAS+pF,GAAiCD,IAAmEljF,SAAlCwoE,IAAgDnvE,EAAOD,QAAUovE,KAU7VhvE,KAAM,WAEN,GAAI0qF,GAAe,IAkBnB,OAAO,SAAS/lB,GAAY7gE,GAuG1B,QAASiE,GAAMy9D,GACb,MAAOA,GAAOjhE,MAAM,UAOtB,QAASomF,GAAkBrhF,GAEzB,GAAmB,iBAAfA,EAAMxC,KAAyB,CACjC,GAAIwC,EAAMwqC,SAAS82C,UAAYthF,EAAMwqC,SAAS82C,SAASthF,EAAMxC,MAC3D,MAIAwC,GAAMwqC,SAAS82C,YACfthF,EAAMwqC,SAAS82C,SAASthF,EAAMxC,OAAQ,EAK1C,GAAI+jF,IAAU,CACdvhF,GAAMq8B,gBAAkB,WACtBklD,GAAU,GAIZvhF,EAAMwhF,YAAcJ,CAIpB,KADA,GAAInjF,GAAOmjF,EACJnjF,IAASsjF,GAAS,CACvB,GAAIE,GAAYxjF,EAAKzD,QAAUyD,EAAKzD,OAAOinF,UAAUzhF,EAAMxC,KAC3D,IAAIikF,EACF,IAAK,GAAIvlF,GAAI,EAAGA,EAAIulF,EAAUplF,SAAWklF,EAASrlF,IAChDulF,EAAUvlF,GAAG8D,EAIjB/B,GAAOA,EAAKsC,YA9IhB,GAAI/F,EAAOknF,QAAS,CAGlB,GAAIjnF,GAASD,EAETmnF,EAAoB,SAASriF,EAAS6F,GACxC,MAAOk2D,GAAY,GAAI5gE,GAAO6E,EAAS6F,IAOzC,OALA1K,GAAOuB,OAAO2lF,EAAmBlnF,GACjCknF,EAAkBD,QAAU,SAAUpiF,EAAS6F,GAC7C,MAAOk2D,GAAY,GAAI5gE,GAAOinF,QAAQpiF,EAAS6F,KAG1Cw8E,EAIT,GAAIriF,GAAU9E,EAAO8E,OAiIrB,OAhIAA,GAAQ9E,OAASA,EAGjBA,EAAOonF,IAAMpnF,EAAOyP,GACpBzP,EAAOqnF,KAAOrnF,EAAO4P,IACrB5P,EAAOsnF,SAAWtnF,EAAOwP,QAGzBxP,EAAOinF,aAIPjnF,EAAOonF,IAAI,eAAgB,SAAU5hF,GAC/BA,EAAMqnC,UACR+5C,EAAephF,EAAMI,UAUzB5F,EAAOyP,GAAK,SAAUiyD,EAAQ6lB,GAa5B,MAXAtjF,GAAMy9D,GAAQn9D,QAAQ,SAAUiB,GAC9B,GAAIyhF,GAAYjnF,EAAOinF,UAAUzhF,EAC5ByhF,KACHjnF,EAAOinF,UAAUzhF,GAASyhF,KAG1BjnF,EAAOonF,IAAI5hF,EAAOqhF,IAEpBI,EAAU/iF,KAAKqjF,KAGVvnF,GAWTA,EAAO4P,IAAM,SAAU8xD,EAAQ6lB,GAoB7B,MAlBAtjF,GAAMy9D,GAAQn9D,QAAQ,SAAUiB,GAC9B,GAAIyhF,GAAYjnF,EAAOinF,UAAUzhF,EAC7ByhF,KACFA,EAAYM,EAAUN,EAAUp3E,OAAO,SAAUhI,GAC/C,MAAOA,KAAM0/E,OAGXN,EAAUplF,OAAS,EACrB7B,EAAOinF,UAAUzhF,GAASyhF,GAI1BjnF,EAAOqnF,KAAK7hF,EAAOqhF,SACZ7mF,GAAOinF,UAAUzhF,OAKvBxF,GAGTA,EAAOwP,QAAU,WAEf,GAAI1K,GAAU9E,EAAO8E,cACdA,GAAQ9E,OAGfA,EAAOinF,aAGPjnF,EAAOsnF,YAgDFtnF,MAOP,SAASjE,EAAQD,EAASM,GAE9B,GAAI8uE,IAKJ,SAAUzlE,EAAQgI,EAAU+5E,EAAY9kF,GAmBxC,QAAS+kF,GAAkBryE,EAAIsyE,EAASryE,GACpC,MAAOG,YAAWmyE,EAAOvyE,EAAIC,GAAUqyE,GAY3C,QAASE,GAAeC,EAAKzyE,EAAIC,GAC7B,MAAIlT,OAAMC,QAAQylF,IACdC,EAAKD,EAAKxyE,EAAQD,GAAKC,IAChB,IAEJ,EASX,QAASyyE,GAAK7oE,EAAK8oE,EAAU1yE,GACzB,GAAI3T,EAEJ,IAAKud,EAIL,GAAIA,EAAI1a,QACJ0a,EAAI1a,QAAQwjF,EAAU1yE,OACnB,IAAI4J,EAAIpd,SAAWa,EAEtB,IADAhB,EAAI,EACGA,EAAIud,EAAIpd,QACXkmF,EAAStrF,KAAK4Y,EAAS4J,EAAIvd,GAAIA,EAAGud,GAClCvd,QAGJ,KAAKA,IAAKud,GACNA,EAAIjd,eAAeN,IAAMqmF,EAAStrF,KAAK4Y,EAAS4J,EAAIvd,GAAIA,EAAGud,GAavE,QAASzd,GAAOwmF,EAAMlmC,EAAKmb,GAGvB,IAFA,GAAI3zD,GAAO7G,OAAO6G,KAAKw4C,GACnBpgD,EAAI,EACDA,EAAI4H,EAAKzH,UACPo7D,GAAUA,GAAS+qB,EAAK1+E,EAAK5H,MAAQgB,KACtCslF,EAAK1+E,EAAK5H,IAAMogD,EAAIx4C,EAAK5H,KAE7BA,GAEJ,OAAOsmF,GAUX,QAAS/qB,GAAM+qB,EAAMlmC,GACjB,MAAOtgD,GAAOwmF,EAAMlmC,GAAK,GAS7B,QAASmmC,GAAQC,EAAOlY,EAAMnlB,GAC1B,GACIs9B,GADAC,EAAQpY,EAAK3gE,SAGjB84E,GAASD,EAAM74E,UAAY5M,OAAO8H,OAAO69E,GACzCD,EAAO3lF,YAAc0lF,EACrBC,EAAOE,OAASD,EAEZv9B,GACArpD,EAAO2mF,EAAQt9B,GAUvB,QAAS88B,GAAOvyE,EAAIC,GAChB,MAAO,YACH,MAAOD,GAAGnB,MAAMoB,EAASzT,YAWjC,QAAS0mF,GAASvZ,EAAK55D,GACnB,aAAW45D,IAAOwZ,GACPxZ,EAAI96D,MAAMkB,EAAOA,EAAK,IAAMzS,EAAYA,EAAWyS,GAEvD45D,EASX,QAASyZ,GAAYC,EAAMC,GACvB,MAAQD,KAAS/lF,EAAagmF,EAAOD,EASzC,QAASE,GAAkB/iF,EAAQwN,EAAOm0E,GACtCO,EAAKc,EAASx1E,GAAQ,SAASpQ,GAC3B4C,EAAOf,iBAAiB7B,EAAMukF,GAAS,KAU/C,QAASsB,GAAqBjjF,EAAQwN,EAAOm0E,GACzCO,EAAKc,EAASx1E,GAAQ,SAASpQ,GAC3B4C,EAAOP,oBAAoBrC,EAAMukF,GAAS,KAWlD,QAASuB,GAAU9mC,EAAMhhB,GACrB,KAAOghB,GAAM,CACT,GAAIA,GAAQhhB,EACR,OAAO,CAEXghB,GAAOA,EAAKj8C,WAEhB,OAAO,EASX,QAASgjF,GAAMC,EAAKC,GAChB,MAAOD,GAAInmF,QAAQomF,GAAQ,GAQ/B,QAASL,GAASI,GACd,MAAOA,GAAI5/E,OAAOnF,MAAM,QAU5B,QAASilF,GAAQpnC,EAAKmnC,EAAME,GACxB,GAAIrnC,EAAIj/C,UAAYsmF,EAChB,MAAOrnC,GAAIj/C,QAAQomF,EAGnB,KADA,GAAIvnF,GAAI,EACDA,EAAIogD,EAAIjgD,QAAQ,CACnB,GAAKsnF,GAAarnC,EAAIpgD,GAAGynF,IAAcF,IAAWE,GAAarnC,EAAIpgD,KAAOunF,EACtE,MAAOvnF,EAEXA,KAEJ,MAAO,GASf,QAAS+C,GAAQwa,GACb,MAAO9c,OAAMkN,UAAUlI,MAAM1K,KAAKwiB,EAAK,GAU3C,QAASmqE,GAAYtnC,EAAKl9C,EAAKwN,GAK3B,IAJA,GAAI4/D,MACAh/D,KACAtR,EAAI,EAEDA,EAAIogD,EAAIjgD,QAAQ,CACnB,GAAIktE,GAAMnqE,EAAMk9C,EAAIpgD,GAAGkD,GAAOk9C,EAAIpgD,EAC9BwnF,GAAQl2E,EAAQ+7D,GAAO,GACvBiD,EAAQ9tE,KAAK49C,EAAIpgD,IAErBsR,EAAOtR,GAAKqtE,EACZrtE,IAaJ,MAVI0Q,KAII4/D,EAHCptE,EAGSotE,EAAQ5/D,KAAK,SAAyB3Q,EAAGa,GAC/C,MAAOb,GAAEmD,GAAOtC,EAAEsC,KAHZotE,EAAQ5/D,QAQnB4/D,EASX,QAASqX,GAASpqE,EAAKqqE,GAKnB,IAJA,GAAIC,GAAQxnF,EACRynF,EAAYF,EAAS,GAAGnoD,cAAgBmoD,EAASniF,MAAM,GAEvDzF,EAAI,EACDA,EAAI+nF,GAAgB5nF,QAAQ,CAI/B,GAHA0nF,EAASE,GAAgB/nF,GACzBK,EAAO,EAAWwnF,EAASC,EAAYF,EAEnCvnF,IAAQkd,GACR,MAAOld,EAEXL,KAEJ,MAAOgB,GAQX,QAASgnF,KACL,MAAOC,MAQX,QAASC,GAAoB9kF,GACzB,GAAI+kF,GAAM/kF,EAAQglF,aAClB,OAAQD,GAAIE,aAAeF,EAAIG,aAyCnC,QAASC,GAAMC,EAAS1lF,GACpB,GAAIomE,GAAO1uE,IACXA,MAAKguF,QAAUA,EACfhuF,KAAKsI,SAAWA,EAChBtI,KAAK4I,QAAUolF,EAAQplF,QACvB5I,KAAK0J,OAASskF,EAAQv/E,QAAQw/E,YAI9BjuF,KAAKkuF,WAAa,SAASC,GACnB/B,EAAS4B,EAAQv/E,QAAQ20B,QAAS4qD,KAClCtf,EAAK2c,QAAQ8C,IAIrBnuF,KAAKouF,OAoCT,QAASC,GAAoBL,GACzB,GAAIM,GACAC,EAAaP,EAAQv/E,QAAQ8/E,UAajC,OAAO,KAVHD,EADAC,EACOA,EACAC,GACAC,EACAC,GACAC,EACCC,GAGDC,EAFAC,GAIOd,EAAShnB,GAS/B,QAASA,GAAagnB,EAASe,EAAWja,GACtC,GAAIka,GAAcla,EAAMma,SAAStpF,OAC7BupF,EAAqBpa,EAAMqa,gBAAgBxpF,OAC3CgrC,EAAWo+C,EAAYK,IAAgBJ,EAAcE,IAAuB,EAC5EhoB,EAAW6nB,GAAaM,GAAYC,KAAkBN,EAAcE,IAAuB,CAE/Fpa,GAAMnkC,UAAYA,EAClBmkC,EAAM5N,UAAYA,EAEdv2B,IACAq9C,EAAQuB,YAKZza,EAAMia,UAAYA,EAGlBS,EAAiBxB,EAASlZ,GAG1BkZ,EAAQpgE,KAAK,eAAgBknD,GAE7BkZ,EAAQyB,UAAU3a,GAClBkZ,EAAQuB,QAAQG,UAAY5a,EAQhC,QAAS0a,GAAiBxB,EAASlZ,GAC/B,GAAIya,GAAUvB,EAAQuB,QAClBN,EAAWna,EAAMma,SACjBU,EAAiBV,EAAStpF,MAGzB4pF,GAAQK,aACTL,EAAQK,WAAaC,EAAqB/a,IAI1C6a,EAAiB,IAAMJ,EAAQO,cAC/BP,EAAQO,cAAgBD,EAAqB/a,GACnB,IAAnB6a,IACPJ,EAAQO,eAAgB,EAG5B,IAAIF,GAAaL,EAAQK,WACrBE,EAAgBP,EAAQO,cACxBC,EAAeD,EAAgBA,EAAc5jE,OAAS0jE,EAAW1jE,OAEjEA,EAAS4oD,EAAM5oD,OAAS8jE,EAAUf,EACtCna,GAAMmb,UAAY9yD,KAClB23C,EAAMob,UAAYpb,EAAMmb,UAAYL,EAAWK,UAE/Cnb,EAAM1mB,MAAQ+hC,EAASJ,EAAc7jE,GACrC4oD,EAAMnvD,SAAW84C,EAAYsxB,EAAc7jE,GAE3CkkE,EAAeb,EAASza,GACxBA,EAAMub,gBAAkBC,EAAaxb,EAAMt1C,OAAQs1C,EAAMr1C,QAEzDq1C,EAAM73D,MAAQ6yE,EAAgBv8B,EAASu8B,EAAcb,SAAUA,GAAY,EAC3Ena,EAAMyb,SAAWT,EAAgBU,EAAYV,EAAcb,SAAUA,GAAY,EAEjFwB,EAAyBlB,EAASza,EAGlC,IAAIprE,GAASskF,EAAQplF,OACjBgkF,GAAU9X,EAAMhhC,SAASpqC,OAAQA,KACjCA,EAASorE,EAAMhhC,SAASpqC,QAE5BorE,EAAMprE,OAASA,EAGnB,QAAS0mF,GAAeb,EAASza,GAC7B,GAAI5oD,GAAS4oD,EAAM5oD,OACfvC,EAAS4lE,EAAQmB,gBACjBC,EAAYpB,EAAQoB,cACpBjB,EAAYH,EAAQG,eAEpB5a,EAAMia,YAAcK,IAAeM,EAAUX,YAAcM,MAC3DsB,EAAYpB,EAAQoB,WAChB5+E,EAAG29E,EAAUlwD,QAAU,EACvBxtB,EAAG09E,EAAUjwD,QAAU,GAG3B9V,EAAS4lE,EAAQmB,aACb3+E,EAAGma,EAAOna,EACVC,EAAGka,EAAOla,IAIlB8iE,EAAMt1C,OAASmxD,EAAU5+E,GAAKma,EAAOna,EAAI4X,EAAO5X,GAChD+iE,EAAMr1C,OAASkxD,EAAU3+E,GAAKka,EAAOla,EAAI2X,EAAO3X,GAQpD,QAASy+E,GAAyBlB,EAASza,GACvC,GAEItW,GAAUoyB,EAAWC,EAAW31D,EAFhC41D,EAAOvB,EAAQwB,cAAgBjc,EAC/Bob,EAAYpb,EAAMmb,UAAYa,EAAKb,SAGvC,IAAInb,EAAMia,WAAaO,KAAiBY,EAAYc,IAAoBF,EAAKtyB,WAAah4D,GAAY,CAClG,GAAIg5B,GAASsxD,EAAKtxD,OAASs1C,EAAMt1C,OAC7BC,EAASqxD,EAAKrxD,OAASq1C,EAAMr1C,OAE7B5zB,EAAIolF,EAAYf,EAAW1wD,EAAQC,EACvCmxD,GAAY/kF,EAAEkG,EACd8+E,EAAYhlF,EAAEmG,EACdwsD,EAAY3zC,GAAIhf,EAAEkG,GAAK8Y,GAAIhf,EAAEmG,GAAMnG,EAAEkG,EAAIlG,EAAEmG,EAC3CkpB,EAAYo1D,EAAa9wD,EAAQC,GAEjC8vD,EAAQwB,aAAejc,MAGvBtW,GAAWsyB,EAAKtyB,SAChBoyB,EAAYE,EAAKF,UACjBC,EAAYC,EAAKD,UACjB31D,EAAY41D,EAAK51D,SAGrB45C,GAAMtW,SAAWA,EACjBsW,EAAM8b,UAAYA,EAClB9b,EAAM+b,UAAYA,EAClB/b,EAAM55C,UAAYA,EAQtB,QAAS20D,GAAqB/a,GAK1B,IAFA,GAAIma,MACAzpF,EAAI,EACDA,EAAIsvE,EAAMma,SAAStpF,QACtBspF,EAASzpF,IACLmX,QAAS+Q,GAAMonD,EAAMma,SAASzpF,GAAGmX,SACjCG,QAAS4Q,GAAMonD,EAAMma,SAASzpF,GAAGsX,UAErCtX,GAGJ,QACIyqF,UAAW9yD,KACX8xD,SAAUA,EACV/iE,OAAQ8jE,EAAUf,GAClBzvD,OAAQs1C,EAAMt1C,OACdC,OAAQq1C,EAAMr1C,QAStB,QAASuwD,GAAUf,GACf,GAAIU,GAAiBV,EAAStpF,MAG9B,IAAuB,IAAnBgqF,EACA,OACI59E,EAAG2b,GAAMuhE,EAAS,GAAGtyE,SACrB3K,EAAG0b,GAAMuhE,EAAS,GAAGnyE,SAK7B,KADA,GAAI/K,GAAI,EAAGC,EAAI,EAAGxM,EAAI,EACXmqF,EAAJnqF,GACHuM,GAAKk9E,EAASzpF,GAAGmX,QACjB3K,GAAKi9E,EAASzpF,GAAGsX,QACjBtX,GAGJ,QACIuM,EAAG2b,GAAM3b,EAAI49E,GACb39E,EAAG0b,GAAM1b,EAAI29E,IAWrB,QAASsB,GAAYf,EAAWn+E,EAAGC,GAC/B,OACID,EAAGA,EAAIm+E,GAAa,EACpBl+E,EAAGA,EAAIk+E,GAAa,GAU5B,QAASI,GAAav+E,EAAGC,GACrB,MAAID,KAAMC,EACCk/E,GAGPrmE,GAAI9Y,IAAM8Y,GAAI7Y,GACPD,EAAI,EAAIo/E,GAAiBC,GAE7Bp/E,EAAI,EAAIq/E,GAAeC,GAUlC,QAAS7yB,GAAY2L,EAAIC,EAAIrkE,GACpBA,IACDA,EAAQurF,GAEZ,IAAIx/E,GAAIs4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,IAC5BgM,EAAIq4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,GAEhC,OAAOd,MAAKyqB,KAAM5d,EAAIA,EAAMC,EAAIA,GAUpC,QAASm+E,GAAS/lB,EAAIC,EAAIrkE,GACjBA,IACDA,EAAQurF,GAEZ,IAAIx/E,GAAIs4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,IAC5BgM,EAAIq4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,GAChC,OAA0B,KAAnBd,KAAK4xD,MAAM9kD,EAAGD,GAAW7M,KAAKymB,GASzC,QAAS6kE,GAAY5gF,EAAOC,GACxB,MAAOsgF,GAAStgF,EAAI,GAAIA,EAAI,GAAI2hF,IAAmBrB,EAASvgF,EAAM,GAAIA,EAAM,GAAI4hF,IAUpF,QAASj+B,GAAS3jD,EAAOC,GACrB,MAAO4uD,GAAY5uD,EAAI,GAAIA,EAAI,GAAI2hF,IAAmB/yB,EAAY7uD,EAAM,GAAIA,EAAM,GAAI4hF,IAiB1F,QAAS1C,KACL9uF,KAAKyxF,KAAOC,GACZ1xF,KAAK2xF,MAAQC,GAEb5xF,KAAK6xF,OAAQ,EACb7xF,KAAK8xF,SAAU,EAEf/D,EAAMh2E,MAAM/X,KAAM0F,WAoEtB,QAAS+oF,KACLzuF,KAAKyxF,KAAOM,GACZ/xF,KAAK2xF,MAAQK,GAEbjE,EAAMh2E,MAAM/X,KAAM0F,WAElB1F,KAAKiyF,MAASjyF,KAAKguF,QAAQuB,QAAQ2C,iBAoEvC,QAASC,KACLnyF,KAAKoyF,SAAWC,GAChBryF,KAAK2xF,MAAQW,GACbtyF,KAAKuyF,SAAU,EAEfxE,EAAMh2E,MAAM/X,KAAM0F,WAsCtB,QAAS8sF,GAAuBrE,EAAIrnF,GAChC,GAAI2rF,GAAMlqF,EAAQ4lF,EAAGuE,SACjBzzD,EAAU12B,EAAQ4lF,EAAGwE,eAMzB,OAJI7rF,IAAQuoF,GAAYC,MACpBmD,EAAMvF,EAAYuF,EAAIz+E,OAAOirB,GAAU,cAAc,KAGjDwzD,EAAKxzD,GAiBjB,QAAS0vD,KACL3uF,KAAKoyF,SAAWQ,GAChB5yF,KAAK6yF,aAEL9E,EAAMh2E,MAAM/X,KAAM0F,WA0BtB,QAASotF,GAAW3E,EAAIrnF,GACpB,GAAIisF,GAAaxqF,EAAQ4lF,EAAGuE,SACxBG,EAAY7yF,KAAK6yF,SAGrB,IAAI/rF,GAAQsoF,GAAc4D,KAAqC,IAAtBD,EAAWptF,OAEhD,MADAktF,GAAUE,EAAW,GAAGE,aAAc,GAC9BF,EAAYA,EAGxB,IAAIvtF,GACAoX,EACA+1E,EAAiBpqF,EAAQ4lF,EAAGwE,gBAC5BO,KACAxpF,EAAS1J,KAAK0J,MAQlB,IALAkT,EAAgBm2E,EAAWp/E,OAAO,SAASmqB,GACvC,MAAO8uD,GAAU9uD,EAAMp0B,OAAQA,KAI/B5C,IAASsoF,GAET,IADA5pF,EAAI,EACGA,EAAIoX,EAAcjX,QACrBktF,EAAUj2E,EAAcpX,GAAGytF,aAAc,EACzCztF,GAMR,KADAA,EAAI,EACGA,EAAImtF,EAAehtF,QAClBktF,EAAUF,EAAentF,GAAGytF,aAC5BC,EAAqBlrF,KAAK2qF,EAAentF,IAIzCsB,GAAQuoF,GAAYC,WACbuD,GAAUF,EAAentF,GAAGytF,YAEvCztF,GAGJ,OAAK0tF,GAAqBvtF,QAMtBunF,EAAYtwE,EAAc5I,OAAOk/E,GAAuB,cAAc,GACtEA,GAPJ,OAoBJ,QAASrE,KACLd,EAAMh2E,MAAM/X,KAAM0F,UAElB,IAAI2lF,GAAUI,EAAOzrF,KAAKqrF,QAASrrF,KACnCA,MAAK89B,MAAQ,GAAI6wD,GAAW3uF,KAAKguF,QAAS3C,GAC1CrrF,KAAKmzF,MAAQ,GAAIrE,GAAW9uF,KAAKguF,QAAS3C,GAyD9C,QAAS+H,GAAYpF,EAAS3mF,GAC1BrH,KAAKguF,QAAUA,EACfhuF,KAAKmpD,IAAI9hD,GAuFb,QAASgsF,GAAkBC,GAEvB,GAAIzG,EAAMyG,EAASC,IACf,MAAOA,GAGX,IAAIC,GAAU3G,EAAMyG,EAASG,IACzBC,EAAU7G,EAAMyG,EAASK,GAG7B,OAAIH,IAAWE,EACJD,GAAqB,IAAME,GAIlCH,GAAWE,EACJF,EAAUC,GAAqBE,GAItC9G,EAAMyG,EAASM,IACRA,GAGJC,GA4CX,QAASC,GAAWrlF,GAChBzO,KAAKK,GAAKmtF,IAEVxtF,KAAKguF,QAAU,KACfhuF,KAAKyO,QAAUsyD,EAAMtyD,MAAezO,KAAK+zF,UAGzC/zF,KAAKyO,QAAQ20B,OAASkpD,EAAYtsF,KAAKyO,QAAQ20B,QAAQ,GAEvDpjC,KAAKg0F,MAAQC,GAEbj0F,KAAKk0F,gBACLl0F,KAAKm0F,eAiOT,QAASC,GAASJ,GACd,MAAIA,GAAQK,GACD,SACAL,EAAQM,GACR,MACAN,EAAQO,GACR,OACAP,EAAQQ,GACR,QAEJ,GAQX,QAASC,GAAav5D,GAClB,MAAIA,IAAao2D,GACN,OACAp2D,GAAam2D,GACb,KACAn2D,GAAai2D,GACb,OACAj2D,GAAak2D,GACb,QAEJ,GASX,QAASsD,IAA6BC,EAAiBC,GACnD,GAAI5G,GAAU4G,EAAW5G,OACzB,OAAIA,GACOA,EAAQ94E,IAAIy/E,GAEhBA,EAQX,QAASE,MACLf,EAAW/7E,MAAM/X,KAAM0F,WA6D3B,QAASovF,MACLD,GAAe98E,MAAM/X,KAAM0F,WAE3B1F,KAAK+0F,GAAK,KACV/0F,KAAKg1F,GAAK,KA2Ed,QAASC,MACLJ,GAAe98E,MAAM/X,KAAM0F,WAsC/B,QAASwvF,MACLpB,EAAW/7E,MAAM/X,KAAM0F,WAEvB1F,KAAKm1F,OAAS,KACdn1F,KAAKo1F,OAAS,KAmElB,QAASC,MACLR,GAAe98E,MAAM/X,KAAM0F,WA8B/B,QAAS4vF,MACLT,GAAe98E,MAAM/X,KAAM0F,WA0D/B,QAAS6vF,MACLzB,EAAW/7E,MAAM/X,KAAM0F,WAIvB1F,KAAKw1F,OAAQ,EACbx1F,KAAKy1F,SAAU,EAEfz1F,KAAKm1F,OAAS,KACdn1F,KAAKo1F,OAAS,KACdp1F,KAAKgX,MAAQ,EAqGjB,QAASjT,IAAO6E,EAAS6F,GAGrB,MAFAA,GAAUA,MACVA,EAAQinF,YAAcpJ,EAAY79E,EAAQinF,YAAa3xF,GAAOgwF,SAAS4B,QAChE,GAAI3K,IAAQpiF,EAAS6F,GAiIhC,QAASu8E,IAAQpiF,EAAS6F,GACtBA,EAAUA,MAEVzO,KAAKyO,QAAUsyD,EAAMtyD,EAAS1K,GAAOgwF,UACrC/zF,KAAKyO,QAAQw/E,YAAcjuF,KAAKyO,QAAQw/E,aAAerlF,EAEvD5I,KAAK41F,YACL51F,KAAKuvF,WACLvvF,KAAK01F,eAEL11F,KAAK4I,QAAUA,EACf5I,KAAK80E,MAAQuZ,EAAoBruF,MACjCA,KAAKslE,YAAc,GAAI8tB,GAAYpzF,KAAMA,KAAKyO,QAAQ62D,aAEtDuwB,GAAe71F,MAAM,GAErB4rF,EAAKn9E,EAAQinF,YAAa,SAASrmF,GAC/B,GAAIulF,GAAa50F,KAAKiT,IAAI,GAAK5D,GAAK,GAAIA,EAAK,IAC7CA,GAAK,IAAMulF,EAAWkB,cAAczmF,EAAK,IACzCA,EAAK,IAAMulF,EAAWmB,eAAe1mF,EAAK,KAC3CrP,MAyOP,QAAS61F,IAAe7H,EAAS/6E,GAC7B,GAAIrK,GAAUolF,EAAQplF,OACtBgjF,GAAKoC,EAAQv/E,QAAQunF,SAAU,SAAS3uF,EAAO4O,GAC3CrN,EAAQqE,MAAMkgF,EAASvkF,EAAQqE,MAAOgJ,IAAShD,EAAM5L,EAAQ,KASrE,QAAS4uF,IAAgB3sF,EAAOoJ,GAC5B,GAAIwjF,GAAe3kF,EAAS4kF,YAAY,QACxCD,GAAaE,UAAU9sF,GAAO,GAAM,GACpC4sF,EAAaG,QAAU3jF,EACvBA,EAAKhJ,OAAO4sF,cAAcJ,GAr1E9B,GAAI3I,KAAmB,GAAI,SAAU,MAAO,KAAM,KAAM,KACpDgJ,GAAehlF,EAASM,cAAc,OAEtCw6E,GAAgB,WAEhB3+D,GAAQxoB,KAAKwoB,MACb7C,GAAM3lB,KAAK2lB,IACXsS,GAAM74B,KAAK64B,IAwSXswD,GAAY,EAeZ+I,GAAe,wCAEf5H,GAAiB,gBAAkBrlF,GACnCilF,GAAyBrB,EAAS5jF,EAAQ,kBAAoB/C,EAC9DkoF,GAAqBE,IAAiB4H,GAAaxoF,KAAKhF,UAAUC,WAElEwtF,GAAmB,QACnBC,GAAiB,MACjBC,GAAmB,QACnBC,GAAoB,SAEpB5F,GAAmB,GAEnB5B,GAAc,EACd4D,GAAa,EACb3D,GAAY,EACZC,GAAe,EAEf4B,GAAiB,EACjBC,GAAiB,EACjBC,GAAkB,EAClBC,GAAe,EACfC,GAAiB,GAEjBuF,GAAuB1F,GAAiBC,GACxC0F,GAAqBzF,GAAeC,GACpCyF,GAAgBF,GAAuBC,GAEvCvF,IAAY,IAAK,KACjBC,IAAmB,UAAW,UA4BlCzD,GAAM56E,WAKFk4E,QAAS,aAKT+C,KAAM,WACFpuF,KAAKyxF,MAAQhF,EAAkBzsF,KAAK4I,QAAS5I,KAAKyxF,KAAMzxF,KAAKkuF,YAC7DluF,KAAKoyF,UAAY3F,EAAkBzsF,KAAK0J,OAAQ1J,KAAKoyF,SAAUpyF,KAAKkuF,YACpEluF,KAAK2xF,OAASlF,EAAkBiB,EAAoB1tF,KAAK4I,SAAU5I,KAAK2xF,MAAO3xF,KAAKkuF,aAMxF56E,QAAS,WACLtT,KAAKyxF,MAAQ9E,EAAqB3sF,KAAK4I,QAAS5I,KAAKyxF,KAAMzxF,KAAKkuF,YAChEluF,KAAKoyF,UAAYzF,EAAqB3sF,KAAK0J,OAAQ1J,KAAKoyF,SAAUpyF,KAAKkuF,YACvEluF,KAAK2xF,OAAShF,EAAqBe,EAAoB1tF,KAAK4I,SAAU5I,KAAK2xF,MAAO3xF,KAAKkuF,aAoT/F,IAAI8I,KACAC,UAAW7H,GACX8H,UAAWlE,GACXmE,QAAS9H,IAGTqC,GAAuB,YACvBE,GAAsB,mBAiB1B7F,GAAQ+C,EAAYf,GAKhB1C,QAAS,SAAmB8C,GACxB,GAAIY,GAAYiI,GAAgB7I,EAAGrnF,KAG/BioF,GAAYK,IAA6B,IAAdjB,EAAG1hE,SAC9BzsB,KAAK8xF,SAAU,GAGf/C,EAAYiE,IAA2B,IAAb7E,EAAG3hE,QAC7BuiE,EAAYM,IAIXrvF,KAAK8xF,SAAY9xF,KAAK6xF,QAIvB9C,EAAYM,KACZrvF,KAAK8xF,SAAU,GAGnB9xF,KAAKsI,SAAStI,KAAKguF,QAASe,GACxBE,UAAWd,GACXgB,iBAAkBhB,GAClBiJ,YAAaT,GACb7iD,SAAUq6C,OAKtB,IAAIkJ,KACAC,YAAalI,GACbmI,YAAavE,GACbwE,UAAWnI,GACXoI,cAAenI,GACfoI,WAAYpI,IAIZqI,IACAC,EAAGnB,GACHoB,EAAGnB,GACHoB,EAAGnB,GACHoB,EAAGnB,IAGH7E,GAAyB,cACzBC,GAAwB,qCAGxBzoF,GAAOyuF,iBACPjG,GAAyB,gBACzBC,GAAwB,6CAiB5BjG,EAAQ0C,EAAmBV,GAKvB1C,QAAS,SAAmB8C,GACxB,GAAI8D,GAAQjyF,KAAKiyF,MACbgG,GAAgB,EAEhBC,EAAsB/J,EAAGrnF,KAAKy9B,cAAc/5B,QAAQ,KAAM,IAC1DukF,EAAYsI,GAAkBa,GAC9Bd,EAAcO,GAAuBxJ,EAAGiJ,cAAgBjJ,EAAGiJ,YAE3De,EAAWf,GAAeX,GAG1B2B,EAAapL,EAAQiF,EAAO9D,EAAGkK,UAAW,YAG1CtJ,GAAYK,KAA8B,IAAdjB,EAAG1hE,QAAgB0rE,GAC9B,EAAbC,IACAnG,EAAMjqF,KAAKmmF,GACXiK,EAAanG,EAAMtsF,OAAS,GAEzBopF,GAAaM,GAAYC,MAChC2I,GAAgB,GAIH,EAAbG,IAKJnG,EAAMmG,GAAcjK,EAEpBnuF,KAAKsI,SAAStI,KAAKguF,QAASe,GACxBE,SAAUgD,EACV9C,iBAAkBhB,GAClBiJ,YAAaA,EACbtjD,SAAUq6C,IAGV8J,GAEAhG,EAAM7pF,OAAOgwF,EAAY,MAKrC,IAAIE,KACAC,WAAYnJ,GACZoJ,UAAWxF,GACXyF,SAAUpJ,GACVqJ,YAAapJ,IAGb+C,GAA6B,aAC7BC,GAA6B,2CAejCvG,GAAQoG,EAAkBpE,GACtB1C,QAAS,SAAmB8C,GACxB,GAAIrnF,GAAOwxF,GAAuBnK,EAAGrnF,KAOrC,IAJIA,IAASsoF,KACTpvF,KAAKuyF,SAAU,GAGdvyF,KAAKuyF,QAAV,CAIA,GAAIG,GAAUF,EAAuBjyF,KAAKP,KAAMmuF,EAAIrnF,EAGhDA,IAAQuoF,GAAYC,KAAiBoD,EAAQ,GAAG/sF,OAAS+sF,EAAQ,GAAG/sF,SAAW,IAC/E3F,KAAKuyF,SAAU,GAGnBvyF,KAAKsI,SAAStI,KAAKguF,QAASlnF,GACxBmoF,SAAUyD,EAAQ,GAClBvD,gBAAiBuD,EAAQ,GACzB0E,YAAaX,GACb3iD,SAAUq6C,OAsBtB,IAAIwK,KACAJ,WAAYnJ,GACZoJ,UAAWxF,GACXyF,SAAUpJ,GACVqJ,YAAapJ,IAGbsD,GAAsB,2CAc1B7G,GAAQ4C,EAAYZ,GAChB1C,QAAS,SAAoB8C,GACzB,GAAIrnF,GAAO6xF,GAAgBxK,EAAGrnF,MAC1B4rF,EAAUI,EAAWvyF,KAAKP,KAAMmuF,EAAIrnF,EACnC4rF,IAIL1yF,KAAKsI,SAAStI,KAAKguF,QAASlnF,GACxBmoF,SAAUyD,EAAQ,GAClBvD,gBAAiBuD,EAAQ,GACzB0E,YAAaX,GACb3iD,SAAUq6C,OAmFtBpC,EAAQ8C,EAAiBd,GAOrB1C,QAAS,SAAoB2C,EAAS4K,EAAYC,GAC9C,GAAIV,GAAWU,EAAUzB,aAAeX,GACpCqC,EAAWD,EAAUzB,aAAeT,EAIxC,IAAIwB,EACAn4F,KAAKmzF,MAAMtB,OAAQ,MAChB,IAAIiH,IAAY94F,KAAKmzF,MAAMtB,MAC9B,MAIA+G,IAAcvJ,GAAYC,MAC1BtvF,KAAKmzF,MAAMtB,OAAQ,GAGvB7xF,KAAKsI,SAAS0lF,EAAS4K,EAAYC,IAMvCvlF,QAAS,WACLtT,KAAK89B,MAAMxqB,UACXtT,KAAKmzF,MAAM7/E,YAInB,IAAIylF,IAAwB5L,EAASoJ,GAAatpF,MAAO,eACrD+rF,GAAsBD,KAA0BvyF,EAGhDyyF,GAAuB,UACvBpF,GAAoB,OACpBD,GAA4B,eAC5BL,GAAoB,OACpBE,GAAqB,QACrBE,GAAqB,OAczBP,GAAYjgF,WAKRg2C,IAAK,SAAS9hD,GAENA,GAAS4xF,KACT5xF,EAAQrH,KAAKk5F,WAGbF,KACAh5F,KAAKguF,QAAQplF,QAAQqE,MAAM8rF,IAAyB1xF,GAExDrH,KAAKszF,QAAUjsF,EAAMk9B,cAAcr3B,QAMvC2H,OAAQ,WACJ7U,KAAKmpD,IAAInpD,KAAKguF,QAAQv/E,QAAQ62D,cAOlC4zB,QAAS,WACL,GAAI5F,KAMJ,OALA1H,GAAK5rF,KAAKguF,QAAQ0H,YAAa,SAASd,GAChCxI,EAASwI,EAAWnmF,QAAQ20B,QAASwxD,MACrCtB,EAAUA,EAAQt/E,OAAO4gF,EAAWuE,qBAGrC9F,EAAkBC,EAAQrrF,KAAK,OAO1CmxF,gBAAiB,SAAStkB,GAEtB,IAAIkkB,GAAJ,CAIA,GAAIllD,GAAWghC,EAAMhhC,SACjB5Y,EAAY45C,EAAMub,eAGtB,IAAIrwF,KAAKguF,QAAQuB,QAAQ8J,UAErB,WADAvlD,GAASzqC,gBAIb,IAAIiqF,GAAUtzF,KAAKszF,QACfgG,EAAUzM,EAAMyG,EAASC,IACzBG,EAAU7G,EAAMyG,EAASK,IACzBH,EAAU3G,EAAMyG,EAASG,GAE7B,OAAI6F,IACC5F,GAAWx4D,EAAY27D,IACvBrD,GAAWt4D,EAAY47D,GACjB92F,KAAKu5F,WAAWzlD,GAH3B,SAWJylD,WAAY,SAASzlD,GACjB9zC,KAAKguF,QAAQuB,QAAQ8J,WAAY,EACjCvlD,EAASzqC,kBA+DjB,IAAI4qF,IAAiB,EACjBO,GAAc,EACdD,GAAgB,EAChBD,GAAc,EACdkF,GAAmBlF,GACnBD,GAAkB,GAClBoF,GAAe,EAuBnB3F,GAAW3gF,WAKP4gF,YAOA5qC,IAAK,SAAS16C,GAKV,MAJAnJ,GAAOtF,KAAKyO,QAASA,GAGrBzO,KAAKguF,SAAWhuF,KAAKguF,QAAQ1oB,YAAYzwD,SAClC7U,MAQX81F,cAAe,SAASnB,GACpB,GAAIjJ,EAAeiJ,EAAiB,gBAAiB30F,MACjD,MAAOA,KAGX,IAAIk0F,GAAel0F,KAAKk0F,YAMxB,OALAS,GAAkBD,GAA6BC,EAAiB30F,MAC3Dk0F,EAAaS,EAAgBt0F,MAC9B6zF,EAAaS,EAAgBt0F,IAAMs0F,EACnCA,EAAgBmB,cAAc91F,OAE3BA,MAQX05F,kBAAmB,SAAS/E,GACxB,MAAIjJ,GAAeiJ,EAAiB,oBAAqB30F,MAC9CA,MAGX20F,EAAkBD,GAA6BC,EAAiB30F,YACzDA,MAAKk0F,aAAaS,EAAgBt0F,IAClCL,OAQX+1F,eAAgB,SAASpB,GACrB,GAAIjJ,EAAeiJ,EAAiB,iBAAkB30F,MAClD,MAAOA,KAGX,IAAIm0F,GAAcn0F,KAAKm0F,WAMvB,OALAQ,GAAkBD,GAA6BC,EAAiB30F,MAClB,KAA1CgtF,EAAQmH,EAAaQ,KACrBR,EAAYnsF,KAAK2sF,GACjBA,EAAgBoB,eAAe/1F,OAE5BA,MAQX25F,mBAAoB,SAAShF,GACzB,GAAIjJ,EAAeiJ,EAAiB,qBAAsB30F,MACtD,MAAOA,KAGX20F,GAAkBD,GAA6BC,EAAiB30F,KAChE,IAAImI,GAAQ6kF,EAAQhtF,KAAKm0F,YAAaQ,EAItC,OAHIxsF,GAAQ,IACRnI,KAAKm0F,YAAY/rF,OAAOD,EAAO,GAE5BnI,MAOX45F,mBAAoB,WAChB,MAAO55F,MAAKm0F,YAAYxuF,OAAS,GAQrCk0F,iBAAkB,SAASlF,GACvB,QAAS30F,KAAKk0F,aAAaS,EAAgBt0F,KAQ/CutB,KAAM,SAASknD,GAIX,QAASlnD,GAAKksE,GACVprB,EAAKsf,QAAQpgE,KAAK8gD,EAAKjgE,QAAQnF,OAASwwF,EAAY1F,EAASJ,GAAS,IAAKlf,GAJ/E,GAAIpG,GAAO1uE,KACPg0F,EAAQh0F,KAAKg0F,KAOLM,IAARN,GACApmE,GAAK,GAGTA,IAGIomE,GAASM,IACT1mE,GAAK,IAUbmsE,QAAS,SAASjlB,GACd,MAAI90E,MAAKg6F,UACEh6F,KAAK4tB,KAAKknD,QAGrB90E,KAAKg0F,MAAQyF,KAOjBO,QAAS,WAEL,IADA,GAAIx0F,GAAI,EACDA,EAAIxF,KAAKm0F,YAAYxuF,QAAQ,CAChC,KAAM3F,KAAKm0F,YAAY3uF,GAAGwuF,OAASyF,GAAexF,KAC9C,OAAO,CAEXzuF,KAEJ,OAAO,GAOXiqF,UAAW,SAASoJ,GAGhB,GAAIoB,GAAiB30F,KAAWuzF,EAGhC,OAAKzM,GAASpsF,KAAKyO,QAAQ20B,QAASpjC,KAAMi6F,KAOtCj6F,KAAKg0F,OAASwF,GAAmBnF,GAAkBoF,MACnDz5F,KAAKg0F,MAAQC,IAGjBj0F,KAAKg0F,MAAQh0F,KAAKk6F,QAAQD,QAItBj6F,KAAKg0F,OAASQ,GAAcD,GAAgBD,GAAcD,KAC1Dr0F,KAAK+5F,QAAQE,MAfbj6F,KAAKwpD,aACLxpD,KAAKg0F,MAAQyF,MAyBrBS,QAAS,aAOTf,eAAgB,aAOhB3vC,MAAO,cA8DXuiC,EAAQ8I,GAAgBf,GAKpBC,UAKI9E,SAAU,GASdkL,SAAU,SAASrlB,GACf,GAAIslB,GAAiBp6F,KAAKyO,QAAQwgF,QAClC,OAA0B,KAAnBmL,GAAwBtlB,EAAMma,SAAStpF,SAAWy0F,GAS7DF,QAAS,SAASplB,GACd,GAAIkf,GAAQh0F,KAAKg0F,MACbjF,EAAYja,EAAMia,UAElBsL,EAAerG,GAASQ,GAAcD,IACtCld,EAAUr3E,KAAKm6F,SAASrlB,EAG5B,OAAIulB,KAAiBtL,EAAYO,KAAiBjY,GACvC2c,EAAQK,GACRgG,GAAgBhjB,EACnB0X,EAAYM,GACL2E,EAAQM,GACNN,EAAQQ,GAGdR,EAAQO,GAFJC,GAIRiF,MAiBf1N,EAAQ+I,GAAeD,IAKnBd,UACIzqF,MAAO,MACP0uD,UAAW,GACXi3B,SAAU,EACV/zD,UAAW67D,IAGfoC,eAAgB,WACZ,GAAIj+D,GAAYl7B,KAAKyO,QAAQysB,UACzBo4D,IAOJ,OANIp4D,GAAY27D,IACZvD,EAAQtrF,KAAK2rF,IAEbz4D,EAAY47D,IACZxD,EAAQtrF,KAAKyrF,IAEVH,GAGXgH,cAAe,SAASxlB,GACpB,GAAIrmE,GAAUzO,KAAKyO,QACf8rF,GAAW,EACX50E,EAAWmvD,EAAMnvD,SACjBuV,EAAY45C,EAAM55C,UAClBnpB,EAAI+iE,EAAMt1C,OACVxtB,EAAI8iE,EAAMr1C,MAed,OAZMvE,GAAYzsB,EAAQysB,YAClBzsB,EAAQysB,UAAY27D,IACpB37D,EAAmB,IAANnpB,EAAWm/E,GAAsB,EAAJn/E,EAASo/E,GAAiBC,GACpEmJ,EAAWxoF,GAAK/R,KAAK+0F,GACrBpvE,EAAWzgB,KAAK2lB,IAAIiqD,EAAMt1C,UAE1BtE,EAAmB,IAANlpB,EAAWk/E,GAAsB,EAAJl/E,EAASq/E,GAAeC,GAClEiJ,EAAWvoF,GAAKhS,KAAKg1F,GACrBrvE,EAAWzgB,KAAK2lB,IAAIiqD,EAAMr1C,UAGlCq1C,EAAM55C,UAAYA,EACXq/D,GAAY50E,EAAWlX,EAAQupD,WAAa98B,EAAYzsB,EAAQysB,WAG3Ei/D,SAAU,SAASrlB,GACf,MAAO+f,IAAe1hF,UAAUgnF,SAAS55F,KAAKP,KAAM80E,KAC/C90E,KAAKg0F,MAAQQ,MAAkBx0F,KAAKg0F,MAAQQ,KAAgBx0F,KAAKs6F,cAAcxlB,KAGxFlnD,KAAM,SAASknD,GACX90E,KAAK+0F,GAAKjgB,EAAMt1C,OAChBx/B,KAAKg1F,GAAKlgB,EAAMr1C,MAEhB,IAAIvE,GAAYu5D,EAAa3f,EAAM55C,UAC/BA,IACAl7B,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQ4xB,EAAW45C,GAGtD90E,KAAKmsF,OAAOv+D,KAAKrtB,KAAKP,KAAM80E,MAcpCiX,EAAQkJ,GAAiBJ,IAKrBd,UACIzqF,MAAO,QACP0uD,UAAW,EACXi3B,SAAU,GAGdkK,eAAgB,WACZ,OAAQ5F,KAGZ4G,SAAU,SAASrlB,GACf,MAAO90E,MAAKmsF,OAAOgO,SAAS55F,KAAKP,KAAM80E,KAClC5vE,KAAK2lB,IAAIiqD,EAAM73D,MAAQ,GAAKjd,KAAKyO,QAAQupD,WAAah4D,KAAKg0F,MAAQQ,KAG5E5mE,KAAM,SAASknD,GAEX,GADA90E,KAAKmsF,OAAOv+D,KAAKrtB,KAAKP,KAAM80E,GACR,IAAhBA,EAAM73D,MAAa,CACnB,GAAIu9E,GAAQ1lB,EAAM73D,MAAQ,EAAI,KAAO,KACrCjd,MAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQkxF,EAAO1lB,OAkB1DiX,EAAQmJ,GAAiBpB,GAKrBC,UACIzqF,MAAO,QACP2lF,SAAU,EACV90D,KAAM,IACN69B,UAAW,GAGfmhC,eAAgB,WACZ,OAAQtF,KAGZqG,QAAS,SAASplB,GACd,GAAIrmE,GAAUzO,KAAKyO,QACfgsF,EAAgB3lB,EAAMma,SAAStpF,SAAW8I,EAAQwgF,SAClDyL,EAAgB5lB,EAAMnvD,SAAWlX,EAAQupD,UACzC2iC,EAAY7lB,EAAMob,UAAYzhF,EAAQ0rB,IAM1C,IAJAn6B,KAAKo1F,OAAStgB,GAIT4lB,IAAkBD,GAAkB3lB,EAAMia,WAAaM,GAAYC,MAAkBqL,EACtF36F,KAAKwpD,YACF,IAAIsrB,EAAMia,UAAYK,GACzBpvF,KAAKwpD,QACLxpD,KAAKm1F,OAAS5J,EAAkB,WAC5BvrF,KAAKg0F,MAAQwF,GACbx5F,KAAK+5F;EACNtrF,EAAQ0rB,KAAMn6B,UACd,IAAI80E,EAAMia,UAAYM,GACzB,MAAOmK,GAEX,OAAOC,KAGXjwC,MAAO,WACHnwC,aAAarZ,KAAKm1F,SAGtBvnE,KAAM,SAASknD,GACP90E,KAAKg0F,QAAUwF,KAIf1kB,GAAUA,EAAMia,UAAYM,GAC5BrvF,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQ,KAAMwrE,IAE7C90E,KAAKo1F,OAAOnF,UAAY9yD,KACxBn9B,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAOtJ,KAAKo1F,aAevDrJ,EAAQsJ,GAAkBR,IAKtBd,UACIzqF,MAAO,SACP0uD,UAAW,EACXi3B,SAAU,GAGdkK,eAAgB,WACZ,OAAQ5F,KAGZ4G,SAAU,SAASrlB,GACf,MAAO90E,MAAKmsF,OAAOgO,SAAS55F,KAAKP,KAAM80E,KAClC5vE,KAAK2lB,IAAIiqD,EAAMyb,UAAYvwF,KAAKyO,QAAQupD,WAAah4D,KAAKg0F,MAAQQ,OAc/EzI,EAAQuJ,GAAiBT,IAKrBd,UACIzqF,MAAO,QACP0uD,UAAW,GACXwG,SAAU,IACVtjC,UAAW27D,GAAuBC,GAClC7H,SAAU,GAGdkK,eAAgB,WACZ,MAAOrE,IAAc3hF,UAAUgmF,eAAe54F,KAAKP,OAGvDm6F,SAAU,SAASrlB,GACf,GACItW,GADAtjC,EAAYl7B,KAAKyO,QAAQysB,SAW7B,OARIA,IAAa27D,GAAuBC,IACpCt4B,EAAWsW,EAAMtW,SACVtjC,EAAY27D,GACnBr4B,EAAWsW,EAAM8b,UACV11D,EAAY47D,KACnBt4B,EAAWsW,EAAM+b,WAGd7wF,KAAKmsF,OAAOgO,SAAS55F,KAAKP,KAAM80E,IACnC55C,EAAY45C,EAAM55C,WAClB45C,EAAMnvD,SAAW3lB,KAAKyO,QAAQupD,WAC9BntC,GAAI2zC,GAAYx+D,KAAKyO,QAAQ+vD,UAAYsW,EAAMia,UAAYM,IAGnEzhE,KAAM,SAASknD,GACX,GAAI55C,GAAYu5D,EAAa3f,EAAM55C,UAC/BA,IACAl7B,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQ4xB,EAAW45C,GAGtD90E,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAOwrE,MA2B9CiX,EAAQwJ,GAAezB,GAKnBC,UACIzqF,MAAO,MACP2lF,SAAU,EACV2L,KAAM,EACNpoE,SAAU,IACV2H,KAAM,IACN69B,UAAW,EACX6iC,aAAc,IAGlB1B,eAAgB,WACZ,OAAQvF,KAGZsG,QAAS,SAASplB,GACd,GAAIrmE,GAAUzO,KAAKyO,QAEfgsF,EAAgB3lB,EAAMma,SAAStpF,SAAW8I,EAAQwgF,SAClDyL,EAAgB5lB,EAAMnvD,SAAWlX,EAAQupD,UACzC8iC,EAAiBhmB,EAAMob,UAAYzhF,EAAQ0rB,IAI/C,IAFAn6B,KAAKwpD,QAEAsrB,EAAMia,UAAYK,IAAgC,IAAfpvF,KAAKgX,MACzC,MAAOhX,MAAK+6F,aAKhB,IAAIL,GAAiBI,GAAkBL,EAAe,CAClD,GAAI3lB,EAAMia,WAAaM,GACnB,MAAOrvF,MAAK+6F,aAGhB,IAAIC,GAAgBh7F,KAAKw1F,MAAS1gB,EAAMmb,UAAYjwF,KAAKw1F,MAAQ/mF,EAAQ+jB,UAAY,EACjFyoE,GAAiBj7F,KAAKy1F,SAAWh3B,EAAYz+D,KAAKy1F,QAAS3gB,EAAM5oD,QAAUzd,EAAQosF,YAEvF76F,MAAKw1F,MAAQ1gB,EAAMmb,UACnBjwF,KAAKy1F,QAAU3gB,EAAM5oD,OAEhB+uE,GAAkBD,EAGnBh7F,KAAKgX,OAAS,EAFdhX,KAAKgX,MAAQ,EAKjBhX,KAAKo1F,OAAStgB,CAId,IAAIomB,GAAWl7F,KAAKgX,MAAQvI,EAAQmsF,IACpC,IAAiB,IAAbM,EAGA,MAAKl7F,MAAK45F,sBAGN55F,KAAKm1F,OAAS5J,EAAkB,WAC5BvrF,KAAKg0F,MAAQwF,GACbx5F,KAAK+5F,WACNtrF,EAAQ+jB,SAAUxyB,MACdw0F,IANAgF,GAUnB,MAAOC,KAGXsB,YAAa,WAIT,MAHA/6F,MAAKm1F,OAAS5J,EAAkB,WAC5BvrF,KAAKg0F,MAAQyF,IACdz5F,KAAKyO,QAAQ+jB,SAAUxyB,MACnBy5F,IAGXjwC,MAAO,WACHnwC,aAAarZ,KAAKm1F,SAGtBvnE,KAAM,WACE5tB,KAAKg0F,OAASwF,KACdx5F,KAAKo1F,OAAO8F,SAAWl7F,KAAKgX,MAC5BhX,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAOtJ,KAAKo1F,YAoBvDrxF,GAAOm8E,QAAU,QAMjBn8E,GAAOgwF,UAOHoH,WAAW,EAQX71B,YAAa2zB,GAMb71D,QAAQ,EASR6qD,YAAa,KAObM,WAAY,KAOZoH,SAEKN,IAAoBjyD,QAAQ,KAC5B6xD,IAAmB7xD,QAAQ,IAAU,YACrCkyD,IAAkBp6D,UAAW27D,MAC7B/B,IAAiB55D,UAAW27D,KAAyB,WACrDtB,KACAA,IAAiBjsF,MAAO,YAAasxF,KAAM,IAAM,SACjD1F,KAQLc,UAMIoF,WAAY,OAOZC,YAAa,OASbC,aAAc,OAOdC,eAAgB,OAOhBC,SAAU,OAQVC,kBAAmB,iBAI3B,IAAIC,IAAO,EACPC,GAAc,CA+BlB3Q,IAAQ73E,WAMJg2C,IAAK,SAAS16C,GAaV,MAZAnJ,GAAOtF,KAAKyO,QAASA,GAGjBA,EAAQ62D,aACRtlE,KAAKslE,YAAYzwD,SAEjBpG,EAAQw/E,cAERjuF,KAAK80E,MAAMxhE,UACXtT,KAAK80E,MAAMprE,OAAS+E,EAAQw/E,YAC5BjuF,KAAK80E,MAAMsZ,QAERpuF,MASXklB,KAAM,SAAS0b,GACX5gC,KAAKuvF,QAAQ1E,QAAUjqD,EAAQ+6D,GAAcD,IASjDjM,UAAW,SAASoJ,GAChB,GAAItJ,GAAUvvF,KAAKuvF,OACnB,KAAIA,EAAQ1E,QAAZ,CAKA7qF,KAAKslE,YAAY8zB,gBAAgBP,EAEjC,IAAIjE,GACAc,EAAc11F,KAAK01F,YAKnBkG,EAAgBrM,EAAQqM,gBAIvBA,GAAkBA,GAAiBA,EAAc5H,MAAQwF,MAC1DoC,EAAgBrM,EAAQqM,cAAgB,KAI5C,KADA,GAAIp2F,GAAI,EACDA,EAAIkwF,EAAY/vF,QACnBivF,EAAac,EAAYlwF,GAQrB+pF,EAAQ1E,UAAY8Q,IACfC,GAAiBhH,GAAcgH,IAChChH,EAAWiF,iBAAiB+B,GAGhChH,EAAWprC,QAFXorC,EAAWnF,UAAUoJ,IAOpB+C,GAAiBhH,EAAWZ,OAASQ,GAAcD,GAAgBD,MACpEsH,EAAgBrM,EAAQqM,cAAgBhH,GAE5CpvF,MASR0P,IAAK,SAAS0/E,GACV,GAAIA,YAAsBd,GACtB,MAAOc,EAIX,KAAK,GADDc,GAAc11F,KAAK01F,YACdlwF,EAAI,EAAGA,EAAIkwF,EAAY/vF,OAAQH,IACpC,GAAIkwF,EAAYlwF,GAAGiJ,QAAQnF,OAASsrF,EAChC,MAAOc,GAAYlwF,EAG3B,OAAO,OASXyN,IAAK,SAAS2hF,GACV,GAAIlJ,EAAekJ,EAAY,MAAO50F,MAClC,MAAOA,KAIX,IAAI+hD,GAAW/hD,KAAKkV,IAAI0/E,EAAWnmF,QAAQnF,MAS3C,OARIy4C,IACA/hD,KAAKqW,OAAO0rC,GAGhB/hD,KAAK01F,YAAY1tF,KAAK4sF,GACtBA,EAAW5G,QAAUhuF,KAErBA,KAAKslE,YAAYzwD,SACV+/E,GAQXv+E,OAAQ,SAASu+E,GACb,GAAIlJ,EAAekJ,EAAY,SAAU50F,MACrC,MAAOA,KAGX,IAAI01F,GAAc11F,KAAK01F,WAKvB,OAJAd,GAAa50F,KAAKkV,IAAI0/E,GACtBc,EAAYttF,OAAO4kF,EAAQ0I,EAAad,GAAa,GAErD50F,KAAKslE,YAAYzwD,SACV7U,MASXuT,GAAI,SAASiyD,EAAQ6lB,GACjB,GAAIuK,GAAW51F,KAAK41F,QAKpB,OAJAhK,GAAKc,EAASlnB,GAAS,SAASl8D,GAC5BssF,EAAStsF,GAASssF,EAAStsF,OAC3BssF,EAAStsF,GAAOtB,KAAKqjF,KAElBrrF,MASX0T,IAAK,SAAS8xD,EAAQ6lB,GAClB,GAAIuK,GAAW51F,KAAK41F,QAQpB,OAPAhK,GAAKc,EAASlnB,GAAS,SAASl8D,GACvB+hF,EAGDuK,EAAStsF,GAAOlB,OAAO4kF,EAAQ4I,EAAStsF,GAAQ+hF,GAAU,SAFnDuK,GAAStsF,KAKjBtJ,MAQX4tB,KAAM,SAAStkB,EAAOoJ,GAEd1S,KAAKyO,QAAQ0sF,WACblF,GAAgB3sF,EAAOoJ,EAI3B,IAAIkjF,GAAW51F,KAAK41F,SAAStsF,IAAUtJ,KAAK41F,SAAStsF,GAAO2B,OAC5D,IAAK2qF,GAAaA,EAASjwF,OAA3B,CAIA+M,EAAK5L,KAAOwC,EACZoJ,EAAKrJ,eAAiB,WAClBqJ,EAAKohC,SAASzqC,iBAIlB,KADA,GAAI7D,GAAI,EACDA,EAAIowF,EAASjwF,QAChBiwF,EAASpwF,GAAGkN,GACZlN,MAQR8N,QAAS,WACLtT,KAAK4I,SAAWitF,GAAe71F,MAAM,GAErCA,KAAK41F,YACL51F,KAAKuvF,WACLvvF,KAAK80E,MAAMxhE,UACXtT,KAAK4I,QAAU,OA4BvBtD,EAAOvB,IACHqrF,YAAaA,GACb4D,WAAYA,GACZ3D,UAAWA,GACXC,aAAcA,GAEd2E,eAAgBA,GAChBO,YAAaA,GACbD,cAAeA,GACfD,YAAaA,GACbkF,iBAAkBA,GAClBnF,gBAAiBA,GACjBoF,aAAcA,GAEdvI,eAAgBA,GAChBC,eAAgBA,GAChBC,gBAAiBA,GACjBC,aAAcA,GACdC,eAAgBA,GAChBuF,qBAAsBA,GACtBC,mBAAoBA,GACpBC,cAAeA,GAEf/L,QAASA,GACT+C,MAAOA,EACPqF,YAAaA,EAEbzE,WAAYA,EACZG,WAAYA,EACZL,kBAAmBA,EACnBI,gBAAiBA,EACjBsD,iBAAkBA,EAElB2B,WAAYA,EACZe,eAAgBA,GAChBgH,IAAKtG,GACLuG,IAAKhH,GACLiH,MAAOzG,GACP0G,MAAO/G,GACPgH,OAAQ5G,GACR6G,MAAOhH,GAEP3hF,GAAIk5E,EACJ/4E,IAAKi5E,EACLf,KAAMA,EACN7qB,MAAOA,EACPz7D,OAAQA,EACRymF,QAASA,EACTN,OAAQA,EACR0B,SAAUA,IAGV,YAAcd,IAAiBnsF,EAAoB,KACjD8uE,EAAgC,WAC9B,MAAOjrE,KACTxD,KAAKX,EAASM,EAAqBN,EAASC,KAASmvE,IAAkCxoE,IAAc3G,EAAOD,QAAUovE,KAChG,mBAAVnvE,IAAyBA,EAAOD,QAC9CC,EAAOD,QAAUmE,GAEjBwF,EAAO+hF,GAAcvnF,IAGtBwF,OAAQgI,SAAU,WAKjB,SAAS1R,EAAQD,GAYrBA,EAAQslD,oBAAsB,WAE7BllD,KAAKm8F,aAAan8F,KAAK0hD,UAAUvC,WAAWC,iBAAiB,GAG7Dp/C,KAAKwuD,eAIDxuD,KAAKmhD,WACPnhD,KAAK6nD,aAEP7nD,KAAK4P,SASNhQ,EAAQu8F,aAAe,SAASC,EAAkBC,GAOhD,IANA,GAAI31C,GAAgB1mD,KAAK8jD,YAAYn+C,OAEjC22F,EAAY,GACZ1+C,EAAQ,EAGL8I,EAAgB01C,GAA4BE,EAAR1+C,GACrCA,EAAQ,GAAK,GACf59C,KAAKu8F,oBAAmB,GACxBv8F,KAAKw8F,0BAGLx8F,KAAKy8F,uBAGP/1C,EAAgB1mD,KAAK8jD,YAAYn+C,OACjCi4C,GAAS,CAIPA,GAAQ,GAAmB,GAAdy+C,GACfr8F,KAAK08F,kBAEP18F,KAAKquD,2BASPzuD,EAAQ+8F,YAAc,SAAS72C,GAC7B,GAAI82C,GAA2B58F,KAAK8kD,MACpC,IAAIgB,EAAKuW,YAAcr8D,KAAK0hD,UAAUvC,WAAWM,iBAAmBz/C,KAAK68F,kBAAkB/2C,KACrE,WAAlB9lD,KAAK88F,WAAqD,GAA3B98F,KAAK8jD,YAAYn+C,QAAc,CAEhE3F,KAAK+8F,WAAWj3C,EAIhB,KAHA,GAAIlI,GAAQ,EAGJ59C,KAAK8jD,YAAYn+C,OAAS3F,KAAK0hD,UAAUvC,WAAWC,iBAA6B,GAARxB,GAC/E59C,KAAKg9F,uBACLp/C,GAAS,MAKX59C,MAAKi9F,mBAAmBn3C,GAAK,GAAM,GAGnC9lD,KAAKgnD,uBACLhnD,KAAKk9F,sBACLl9F,KAAKquD,0BACLruD,KAAKwuD,cAIHxuD,MAAK8kD,QAAU83C,GACjB58F,KAAK4P,SAQThQ,EAAQ4sD,sBAAwB,WACW,GAArCxsD,KAAK0hD,UAAUvC,WAAWzwC,SAC5B1O,KAAKm9F,eAAe,GAAE,GAAM,IAUhCv9F,EAAQ68F,qBAAuB,WAC7Bz8F,KAAKm9F,eAAe,IAAG,GAAM,IAS/Bv9F,EAAQo9F,qBAAuB,WAC7Bh9F,KAAKm9F,eAAe,GAAE,GAAM,IAgB9Bv9F,EAAQu9F,eAAiB,SAASC,EAAcC,EAAUz8D,EAAM08D,GAC9D,GAAIV,GAA2B58F,KAAK8kD,OAChCy4C,EAAgBv9F,KAAK8jD,YAAYn+C,MAGjC3F,MAAKmkD,cAAgBnkD,KAAKid,OAA0B,GAAjBmgF,GACrCp9F,KAAKw9F,kBAIHx9F,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,IAAjBmgF,EAGrCp9F,KAAKy9F,cAAc78D,IAEZ5gC,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,GAAjBmgF,KAC7B,GAATx8D,EAGF5gC,KAAK09F,cAAcL,EAAUz8D,GAI7B5gC,KAAK29F,uBAGT39F,KAAKgnD,uBAGDhnD,KAAK8jD,YAAYn+C,QAAU43F,IAAkBv9F,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,IAAjBmgF,KAClFp9F,KAAK49F,eAAeh9D,GACpB5gC,KAAKgnD,yBAIHhnD,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,IAAjBmgF,KACrCp9F,KAAK69F,eACL79F,KAAKgnD,wBAGPhnD,KAAKmkD,cAAgBnkD,KAAKid,MAG1Bjd,KAAKk9F,sBACLl9F,KAAKwuD,eAGDxuD,KAAK8jD,YAAYn+C,OAAS43F,IAC5Bv9F,KAAK87D,gBAAkB,EAEvB97D,KAAKw8F,2BAGW,GAAdc,GAAsC92F,SAAf82F,IAErBt9F,KAAK8kD,QAAU83C,GACjB58F,KAAK4P,QAIT5P,KAAKquD,2BAMPzuD,EAAQi+F,aAAe,WAErB,GAAIC,GAAkB99F,KAAK+9F,mBACvBD,GAAkB99F,KAAK0hD,UAAUvC,WAAWI,gBAC9Cv/C,KAAKg+F,sBAAsB,EAAIh+F,KAAK0hD,UAAUvC,WAAWI,eAAiBu+C,IAW9El+F,EAAQg+F,eAAiB,SAASh9D,GAChC5gC,KAAKi+F,cACLj+F,KAAKk+F,mBAAmBt9D,GAAM,IAQhChhC,EAAQ28F,mBAAqB,SAASe,GACpC,GAAIV,GAA2B58F,KAAK8kD,OAChCy4C,EAAgBv9F,KAAK8jD,YAAYn+C,MAErC3F,MAAK49F,gBAAe,GAGpB59F,KAAKgnD,uBACLhnD,KAAKk9F,sBACLl9F,KAAKwuD,eAGDxuD,KAAK8jD,YAAYn+C,QAAU43F,IAC7Bv9F,KAAK87D,gBAAkB,IAGP,GAAdwhC,GAAsC92F,SAAf82F,IAErBt9F,KAAK8kD,QAAU83C,GACjB58F,KAAK4P,SAUXhQ,EAAQ+9F,oBAAsB,WAC5B,IAAK,GAAIx3C,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EACD,IAAjBL,EAAKma,WACFna,EAAKvzC,MAAMvS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOC,aAC1FsmC,EAAKtzC,OAAOxS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOsF,eAC9F7kB,KAAK28F,YAAY72C,KAc3BlmD,EAAQ89F,cAAgB,SAASL,EAAUz8D,GACzC,IAAK,GAAIp7B,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAChD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACvCxF,MAAKi9F,mBAAmBn3C,EAAKu3C,EAAUz8D,GACvC5gC,KAAKquD,4BAeTzuD,EAAQq9F,mBAAqB,SAASpzF,EAAYwzF,EAAWz8D,EAAOu9D,GAElE,GAAIt0F,EAAWwyD,YAAc,IAEvBxyD,EAAWwyD,YAAcr8D,KAAK0hD,UAAUvC,WAAWM,kBACrD0+C,GAAU,GAEZd,EAAYc,GAAU,EAAOd,EAGzBxzF,EAAWuyD,eAAiBp8D,KAAKid,OAAkB,GAAT2jB,GAE5C,IAAK,GAAIw9D,KAAmBv0F,GAAWyyD,eACrC,GAAIzyD,EAAWyyD,eAAex2D,eAAes4F,GAAkB,CAC7D,GAAIC,GAAYx0F,EAAWyyD,eAAe8hC,EAI7B,IAATx9D,GACEy9D,EAAUviC,gBAAkBjyD,EAAW2yD,gBAAgB3yD,EAAW2yD,gBAAgB72D,OAAO,IACtFw4F,IACLn+F,KAAKs+F,sBAAsBz0F,EAAWu0F,EAAgBf,EAAUz8D,EAAMu9D,GAIpEn+F,KAAK68F,kBAAkBhzF,IACzB7J,KAAKs+F,sBAAsBz0F,EAAWu0F,EAAgBf,EAAUz8D,EAAMu9D,KAwBpFv+F,EAAQ0+F,sBAAwB,SAASz0F,EAAYu0F,EAAiBf,EAAWz8D,EAAOu9D,GACtF,GAAIE,GAAYx0F,EAAWyyD,eAAe8hC,EAG1C,IAAIC,EAAUjiC,eAAiBp8D,KAAKid,OAAkB,GAAT2jB,EAAe,CAE1D5gC,KAAKu+F,eAGLv+F,KAAKg9C,MAAMohD,GAAmBC,EAG9Br+F,KAAKw+F,uBAAuB30F,EAAWw0F,GAGvCr+F,KAAKy+F,wBAAwB50F,EAAWw0F,GAGxCr+F,KAAK0+F,eAAe70F,GAGpBA,EAAW4E,QAAQwuC,MAAQohD,EAAU5vF,QAAQwuC,KAC7CpzC,EAAWwyD,aAAegiC,EAAUhiC,YACpCxyD,EAAW4E,QAAQ8uC,SAAWr4C,KAAK4G,IAAI9L,KAAK0hD,UAAUvC,WAAWS,YAAa5/C,KAAK0hD,UAAU1E,MAAMO,SAAWv9C,KAAK0hD,UAAUvC,WAAWQ,oBAAoB91C,EAAWwyD,YAAY,IACnLxyD,EAAWgyD,mBAAqBhyD,EAAWslD,aAAaxpD,OAGxD04F,EAAUtsF,EAAIlI,EAAWkI,EAAIlI,EAAWqyD,iBAAmB,GAAMh3D,KAAKE,UACtEi5F,EAAUrsF,EAAInI,EAAWmI,EAAInI,EAAWqyD,iBAAmB,GAAMh3D,KAAKE,gBAG/DyE,GAAWyyD,eAAe8hC,EAGjC,IAAIO,IAAgB,CACpB,KAAK,GAAIC,KAAe/0F,GAAWyyD,eACjC,GAAIzyD,EAAWyyD,eAAex2D,eAAe84F,IACvC/0F,EAAWyyD,eAAesiC,GAAa9iC,gBAAkBuiC,EAAUviC,eAAgB,CACrF6iC,GAAgB,CAChB,OAKe,GAAjBA,GACF90F,EAAW2yD,gBAAgBniB,MAG7Br6C,KAAK6+F,uBAAuBR,GAI5BA,EAAUviC,eAAiB,EAG3BjyD,EAAWs0D,iBAGXn+D,KAAK8kD,QAAS,EAIC,GAAbu4C,GACFr9F,KAAKi9F,mBAAmBoB,EAAUhB,EAAUz8D,EAAMu9D,IAWtDv+F,EAAQi/F,uBAAyB,SAAS/4C,GACxC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAC5CsgD,EAAKqJ,aAAa3pD,GAAG2sD,sBAczBvyD,EAAQ69F,cAAgB,SAAS78D,GAClB,GAATA,EACF5gC,KAAK8+F,sBAGL9+F,KAAK++F,wBAUTn/F,EAAQk/F,oBAAsB,WAC5B,GAAIlgF,GAAGC,EAAGlZ,EACNq5F,EAAYh/F,KAAK0hD,UAAUvC,WAAWK,qBAAqBx/C,KAAKid,KAIpE,KAAK,GAAI+vC,KAAUhtD,MAAK89C,MACtB,GAAI99C,KAAK89C,MAAMh4C,eAAeknD,GAAS,CACrC,GAAIU,GAAO1tD,KAAK89C,MAAMkP,EACtB,IAAIU,EAAKC,WACHD,EAAKkG,MAAQlG,EAAKiG,SACpB/0C,EAAM8uC,EAAKrkC,GAAGtX,EAAI27C,EAAKtkC,KAAKrX,EAC5B8M,EAAM6uC,EAAKrkC,GAAGrX,EAAI07C,EAAKtkC,KAAKpX,EAC5BrM,EAAST,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAGrBmgF,EAATr5F,GAAoB,CAEtB,GAAIkE,GAAa6jD,EAAKtkC,KAClBi1E,EAAY3wC,EAAKrkC,EACjBqkC,GAAKrkC,GAAG5a,QAAQwuC,KAAOyQ,EAAKtkC,KAAK3a,QAAQwuC,OAC3CpzC,EAAa6jD,EAAKrkC,GAClBg1E,EAAY3wC,EAAKtkC,MAGiB,GAAhCi1E,EAAUxiC,mBACZ77D,KAAKi/F,cAAcp1F,EAAWw0F,GAAU,GAEA,GAAjCx0F,EAAWgyD,oBAClB77D,KAAKi/F,cAAcZ,EAAUx0F,GAAW,MAetDjK,EAAQm/F,qBAAuB,WAC7B,IAAK,GAAI54C,KAAUnmD,MAAKg9C,MAEtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIk4C,GAAYr+F,KAAKg9C,MAAMmJ,EAG3B,IAAoC,GAAhCk4C,EAAUxiC,oBAA4D,GAAjCwiC,EAAUlvC,aAAaxpD,OAAa,CAC3E,GAAI+nD,GAAO2wC,EAAUlvC,aAAa,GAC9BtlD,EAAc6jD,EAAKkG,MAAQyqC,EAAUh+F,GAAML,KAAKg9C,MAAM0Q,EAAKiG,QAAU3zD,KAAKg9C,MAAM0Q,EAAKkG,KAGrFyqC,GAAUh+F,IAAMwJ,EAAWxJ,KACzBwJ,EAAW4E,QAAQwuC,KAAOohD,EAAU5vF,QAAQwuC,KAC9Cj9C,KAAKi/F,cAAcp1F,EAAWw0F,GAAU,GAGxCr+F,KAAKi/F,cAAcZ,EAAUx0F,GAAW,OAgBpDjK,EAAQs/F,4BAA8B,SAASp5C,GAG7C,IAAK,GAFDq5C,GAAoB,GACpBC,EAAwB,KACnB55F,EAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAC5C,GAA6BgB,SAAzBs/C,EAAKqJ,aAAa3pD,GAAkB,CACtC,GAAI65F,GAAY,IACZv5C,GAAKqJ,aAAa3pD,GAAGmuD,QAAU7N,EAAKzlD,GACtCg/F,EAAYv5C,EAAKqJ,aAAa3pD,GAAG4jB,KAE1B08B,EAAKqJ,aAAa3pD,GAAGouD,MAAQ9N,EAAKzlD,KACzCg/F,EAAYv5C,EAAKqJ,aAAa3pD,GAAG6jB,IAIlB,MAAbg2E,GAAqBF,EAAoBE,EAAU7iC,gBAAgB72D,SACrEw5F,EAAoBE,EAAU7iC,gBAAgB72D,OAC9Cy5F,EAAwBC,GAKb,MAAbA,GAAkD74F,SAA7BxG,KAAKg9C,MAAMqiD,EAAUh/F,KAC5CL,KAAKi/F,cAAcI,EAAWv5C,GAAM,IAYxClmD,EAAQs+F,mBAAqB,SAASt9D,EAAO0+D,GAE3C,IAAK,GAAIn5C,KAAUnmD,MAAKg9C,MAElBh9C,KAAKg9C,MAAMl3C,eAAeqgD,IAC5BnmD,KAAKu/F,oBAAoBv/F,KAAKg9C,MAAMmJ,GAAQvlB,EAAM0+D,IAcxD1/F,EAAQ2/F,oBAAsB,SAASC,EAAS5+D,EAAO0+D,EAAWG,GAKhE,GAJ6Bj5F,SAAzBi5F,IACFA,EAAuB,GAGpBD,EAAQ3jC,oBAAsB77D,KAAKqtE,cAA6B,GAAbiyB,GACrDE,EAAQ3jC,oBAAsB77D,KAAKqtE,cAA6B,GAAbiyB,EAAoB,CASxE,IAAK,GAPD1gF,GAAGC,EAAGlZ,EACNq5F,EAAYh/F,KAAK0hD,UAAUvC,WAAWK,qBAAqBx/C,KAAKid,MAChEyiF,GAAe,EAGfC,KACAC,EAAuBJ,EAAQrwC,aAAaxpD,OACvCimB,EAAI,EAAOg0E,EAAJh0E,EAA0BA,IACxC+zE,EAAa33F,KAAKw3F,EAAQrwC,aAAavjC,GAAGvrB,GAK5C,IAAa,GAATugC,EAEF,IADA8+D,GAAe,EACV9zE,EAAI,EAAOg0E,EAAJh0E,EAA0BA,IAAK,CACzC,GAAI8hC,GAAO1tD,KAAK89C,MAAM6hD,EAAa/zE,GACnC,IAAaplB,SAATknD,GACEA,EAAKC,WACHD,EAAKkG,MAAQlG,EAAKiG,SACpB/0C,EAAM8uC,EAAKrkC,GAAGtX,EAAI27C,EAAKtkC,KAAKrX,EAC5B8M,EAAM6uC,EAAKrkC,GAAGrX,EAAI07C,EAAKtkC,KAAKpX,EAC5BrM,EAAST,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAErBmgF,EAATr5F,GAAoB,CACtB+5F,GAAe,CACf,QASZ,IAAM9+D,GAAS8+D,GAAiB9+D,EAE9B,IAAKhV,EAAI,EAAOg0E,EAAJh0E,EAA0BA,IAGpC,GAFA8hC,EAAO1tD,KAAK89C,MAAM6hD,EAAa/zE,IAElBplB,SAATknD,EAAoB,CACtB,GAAI2wC,GAAYr+F,KAAKg9C,MAAO0Q,EAAKiG,QAAU6rC,EAAQn/F,GAAMqtD,EAAKkG,KAAOlG,EAAKiG,OAErE0qC,GAAUlvC,aAAaxpD,QAAW3F,KAAKqtE,aAAeoyB,GACtDpB,EAAUh+F,IAAMm/F,EAAQn/F,IAC3BL,KAAKi/F,cAAcO,EAAQnB,EAAUz9D,MAkBjDhhC,EAAQq/F,cAAgB,SAASp1F,EAAYw0F,EAAWz9D,GAEtD/2B,EAAWyyD,eAAe+hC,EAAUh+F,IAAMg+F,CAG1C,KAAK,GAAI74F,GAAI,EAAGA,EAAI64F,EAAUlvC,aAAaxpD,OAAQH,IAAK,CACtD,GAAIkoD,GAAO2wC,EAAUlvC,aAAa3pD,EAC9BkoD,GAAKkG,MAAQ/pD,EAAWxJ,IAAMqtD,EAAKiG,QAAU9pD,EAAWxJ,GAC1DL,KAAK6/F,qBAAqBh2F,EAAWw0F,EAAU3wC,GAG/C1tD,KAAK8/F,sBAAsBj2F,EAAWw0F,EAAU3wC,GAIpD2wC,EAAUlvC,gBAGVnvD,KAAK+/F,8BAA8Bl2F,EAAWw0F,SAIvCr+F,MAAKg9C,MAAMqhD,EAAUh+F,GAG5B,IAAI2/F,GAAan2F,EAAW4E,QAAQwuC,IACpCohD,GAAUviC,eAAiB97D,KAAK87D,eAChCjyD,EAAW4E,QAAQwuC,MAAQohD,EAAU5vF,QAAQwuC,KAC7CpzC,EAAWwyD,aAAegiC,EAAUhiC,YACpCxyD,EAAW4E,QAAQ8uC,SAAWr4C,KAAK4G,IAAI9L,KAAK0hD,UAAUvC,WAAWS,YAAa5/C,KAAK0hD,UAAU1E,MAAMO,SAAWv9C,KAAK0hD,UAAUvC,WAAWQ,mBAAmB91C,EAAWwyD,aAGlKxyD,EAAW2yD,gBAAgB3yD,EAAW2yD,gBAAgB72D,OAAS,IAAM3F,KAAK87D,gBAC5EjyD,EAAW2yD,gBAAgBx0D,KAAKhI,KAAK87D,gBAMrCjyD,EAAWuyD,eAFA,GAATx7B,EAE0B,EAGA5gC,KAAKid,MAInCpT,EAAWs0D,iBAGXt0D,EAAWyyD,eAAe+hC,EAAUh+F,IAAI+7D,eAAiBvyD,EAAWuyD,eAGpEiiC,EAAUn+B,gBAGVr2D,EAAWs2D,eAAe6/B,GAG1BhgG,KAAK8kD,QAAS,GAUhBllD,EAAQs9F,oBAAsB,WAC5B,IAAK,GAAI13F,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAChD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACvCsgD,GAAK+V,mBAAqB/V,EAAKqJ,aAAaxpD,MAG5C,IAAIs6F,GAAa,CACjB,IAAIn6C,EAAK+V,mBAAqB,EAC5B,IAAK,GAAIjwC,GAAI,EAAGA,EAAIk6B,EAAK+V,mBAAqB,EAAGjwC,IAG/C,IAAK,GAFDs0E,GAAWp6C,EAAKqJ,aAAavjC,GAAGgoC,KAChCusC,EAAar6C,EAAKqJ,aAAavjC,GAAG+nC,OAC7BysC,EAAIx0E,EAAE,EAAGw0E,EAAIt6C,EAAK+V,mBAAoBukC,KACxCt6C,EAAKqJ,aAAaixC,GAAGxsC,MAAQssC,GAAYp6C,EAAKqJ,aAAaixC,GAAGzsC,QAAUwsC,GACxEr6C,EAAKqJ,aAAaixC,GAAGzsC,QAAUusC,GAAYp6C,EAAKqJ,aAAaixC,GAAGxsC,MAAQusC,KAC3EF,GAAc,EAKtBn6C,GAAK+V,oBAAsBokC,IAa/BrgG,EAAQigG,qBAAuB,SAASh2F,EAAYw0F,EAAW3wC,GAEvD7jD,EAAW0yD,eAAez2D,eAAeu4F,EAAUh+F,MACvDwJ,EAAW0yD,eAAe8hC,EAAUh+F,QAGtCwJ,EAAW0yD,eAAe8hC,EAAUh+F,IAAI2H,KAAK0lD,SAGtC1tD,MAAK89C,MAAM4P,EAAKrtD,GAGvB,KAAK,GAAImF,GAAI,EAAGA,EAAIqE,EAAWslD,aAAaxpD,OAAQH,IAClD,GAAIqE,EAAWslD,aAAa3pD,GAAGnF,IAAMqtD,EAAKrtD,GAAI,CAC5CwJ,EAAWslD,aAAa/mD,OAAO5C,EAAE,EACjC,SAcN5F,EAAQkgG,sBAAwB,SAASj2F,EAAYw0F,EAAW3wC,GAE1DA,EAAKkG,MAAQlG,EAAKiG,OACpB3zD,KAAK6/F,qBAAqBh2F,EAAYw0F,EAAW3wC,IAG7CA,EAAKkG,MAAQyqC,EAAUh+F,IACzBqtD,EAAK0G,aAAapsD,KAAKq2F,EAAUh+F,IACjCqtD,EAAKrkC,GAAKxf,EACV6jD,EAAKkG,KAAO/pD,EAAWxJ,KAIvBqtD,EAAKyG,eAAensD,KAAKq2F,EAAUh+F,IACnCqtD,EAAKtkC,KAAOvf,EACZ6jD,EAAKiG,OAAS9pD,EAAWxJ,IAG3BL,KAAKqgG,oBAAoBx2F,EAAWw0F,EAAU3wC,KAalD9tD,EAAQmgG,8BAAgC,SAASl2F,EAAYw0F,GAE3D,IAAK,GAAI74F,GAAI,EAAGA,EAAIqE,EAAWslD,aAAaxpD,OAAQH,IAAK,CACvD,GAAIkoD,GAAO7jD,EAAWslD,aAAa3pD,EAE/BkoD,GAAKkG,MAAQlG,EAAKiG,QACpB3zD,KAAK6/F,qBAAqBh2F,EAAYw0F,EAAW3wC,KAcvD9tD,EAAQygG,oBAAsB,SAASx2F,EAAYw0F,EAAW3wC,GAGtD7jD,EAAWkxD,cAAcj1D,eAAeu4F,EAAUh+F,MACtDwJ,EAAWkxD,cAAcsjC,EAAUh+F,QAErCwJ,EAAWkxD,cAAcsjC,EAAUh+F,IAAI2H,KAAK0lD,GAG5C7jD,EAAWslD,aAAannD,KAAK0lD,IAY/B9tD,EAAQ6+F,wBAA0B,SAAS50F,EAAYw0F,GACrD,GAAIx0F,EAAWkxD,cAAcj1D,eAAeu4F,EAAUh+F,IAAK,CACzD,IAAK,GAAImF,GAAI,EAAGA,EAAIqE,EAAWkxD,cAAcsjC,EAAUh+F,IAAIsF,OAAQH,IAAK,CACtE,GAAIkoD,GAAO7jD,EAAWkxD,cAAcsjC,EAAUh+F,IAAImF,EAC9CkoD,GAAKyG,eAAezG,EAAKyG,eAAexuD,OAAO,IAAM04F,EAAUh+F,IACjEqtD,EAAKyG,eAAe9Z,MACpBqT,EAAKiG,OAAS0qC,EAAUh+F,GACxBqtD,EAAKtkC,KAAOi1E,IAGZ3wC,EAAK0G,aAAa/Z,MAClBqT,EAAKkG,KAAOyqC,EAAUh+F,GACtBqtD,EAAKrkC,GAAKg1E,GAIZA,EAAUlvC,aAAannD,KAAK0lD,EAG5B,KAAK,GAAI9hC,GAAI,EAAGA,EAAI/hB,EAAWslD,aAAaxpD,OAAQimB,IAClD,GAAI/hB,EAAWslD,aAAavjC,GAAGvrB,IAAMqtD,EAAKrtD,GAAI,CAC5CwJ,EAAWslD,aAAa/mD,OAAOwjB,EAAE,EACjC,cAKC/hB,GAAWkxD,cAAcsjC,EAAUh+F,MAa9CT,EAAQ8+F,eAAiB,SAAS70F,GAChC,IAAK,GAAIrE,GAAI,EAAGA,EAAIqE,EAAWslD,aAAaxpD,OAAQH,IAAK,CACvD,GAAIkoD,GAAO7jD,EAAWslD,aAAa3pD,EAC/BqE,GAAWxJ,IAAMqtD,EAAKkG,MAAQ/pD,EAAWxJ,IAAMqtD,EAAKiG,QACtD9pD,EAAWslD,aAAa/mD,OAAO5C,EAAE,KAcvC5F,EAAQ4+F,uBAAyB,SAAS30F,EAAYw0F,GACpD,IAAK,GAAI74F,GAAI,EAAGA,EAAIqE,EAAW0yD,eAAe8hC,EAAUh+F,IAAIsF,OAAQH,IAAK,CACvE,GAAIkoD,GAAO7jD,EAAW0yD,eAAe8hC,EAAUh+F,IAAImF,EAGnDxF,MAAK89C,MAAM4P,EAAKrtD,IAAMqtD,EAGtB2wC,EAAUlvC,aAAannD,KAAK0lD,GAC5B7jD,EAAWslD,aAAannD,KAAK0lD,SAGxB7jD,GAAW0yD,eAAe8hC,EAAUh+F,KAa7CT,EAAQ4uD,aAAe,WACrB,GAAIrI,EAEJ,KAAKA,IAAUnmD,MAAKg9C,MAClB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EAClBL,GAAKuW,YAAc,IACrBvW,EAAKr9B,MAAQ,IAAIzU,OAAO5P,OAAO0hD,EAAKuW,aAAa,MAMvD,IAAKlW,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACM,GAApBL,EAAKuW,cAELvW,EAAKr9B,MADoBjiB,SAAvBs/C,EAAK2W,cACM3W,EAAK2W,cAGLr4D,OAAO0hD,EAAKzlD,OAuBnCT,EAAQ48F,uBAAyB,WAC/B,GAGIr2C,GAHAm6C,EAAW,EACXC,EAAW,IACXC,EAAe,CAInB,KAAKr6C,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5Bq6C,EAAexgG,KAAKg9C,MAAMmJ,GAAQqW,gBAAgB72D,OACnC66F,EAAXF,IAA0BA,EAAWE,GACrCD,EAAWC,IAAeD,EAAWC,GAI7C,IAAIF,EAAWC,EAAWvgG,KAAK0hD,UAAUvC,WAAWgB,uBAAwB,CAC1E,GAAIo9C,GAAgBv9F,KAAK8jD,YAAYn+C,OACjC86F,EAAcH,EAAWtgG,KAAK0hD,UAAUvC,WAAWgB,sBAEvD,KAAKgG,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,IACxBnmD,KAAKg9C,MAAMmJ,GAAQqW,gBAAgB72D,OAAS86F,GAC9CzgG,KAAKk/F,4BAA4Bl/F,KAAKg9C,MAAMmJ,GAIlDnmD,MAAKgnD,uBACLhnD,KAAKk9F,sBAEDl9F,KAAK8jD,YAAYn+C,QAAU43F,IAC7Bv9F,KAAK87D,gBAAkB,KAe7Bl8D,EAAQi9F,kBAAoB,SAAS/2C,GACnC,MACE5gD,MAAK2lB,IAAIi7B,EAAK/zC,EAAI/R,KAAKkkD,WAAWnyC,IAAM/R,KAAK0hD,UAAUvC,WAAWe,kBAAkBlgD,KAAKid,OAEzF/X,KAAK2lB,IAAIi7B,EAAK9zC,EAAIhS,KAAKkkD,WAAWlyC,IAAMhS,KAAK0hD,UAAUvC,WAAWe,kBAAkBlgD,KAAKid,OAU7Frd,EAAQ88F,gBAAkB,WACxB,IAAK,GAAIl3F,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAChD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACvC,IAAoB,GAAfsgD,EAAKqF,QAAkC,GAAfrF,EAAKsF,OAAkB,CAClD,GAAI3/B,GAAS,EAASzrB,KAAK8jD,YAAYn+C,OAAST,KAAK4G,IAAI,IAAIg6C,EAAKr3C,QAAQwuC,MACtEmR,EAAQ,EAAIlpD,KAAKymB,GAAKzmB,KAAKE,QACZ,IAAf0gD,EAAKqF,SAAkBrF,EAAK/zC,EAAI0Z,EAASvmB,KAAKqZ,IAAI6vC,IACnC,GAAftI,EAAKsF,SAAkBtF,EAAK9zC,EAAIyZ,EAASvmB,KAAKkZ,IAAIgwC,IACtDpuD,KAAK6+F,uBAAuB/4C,MAYlClmD,EAAQq+F,YAAc,WAMpB,IAAK,GALDyC,GAAU,EACVC,EAAiB,EACjBC,EAAa,EACbC,EAAa,EAERr7F,EAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAEhD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACnCsgD,GAAK+V,mBAAqBglC,IAC5BA,EAAa/6C,EAAK+V,oBAEpB6kC,GAAW56C,EAAK+V,mBAChB8kC,GAAkBz7F,KAAK4uB,IAAIgyB,EAAK+V,mBAAmB,GACnD+kC,GAAc,EAEhBF,GAAoBE,EACpBD,GAAkCC,CAElC,IAAIE,GAAWH,EAAiBz7F,KAAK4uB,IAAI4sE,EAAQ,GAE7CK,EAAoB77F,KAAKyqB,KAAKmxE,EAElC9gG,MAAKqtE,aAAenoE,KAAKC,MAAMu7F,EAAU,EAAEK,GAGvC/gG,KAAKqtE,aAAewzB,IACtB7gG,KAAKqtE,aAAewzB,IAexBjhG,EAAQo+F,sBAAwB,SAASgD,GACvChhG,KAAKqtE,aAAe,CACpB,IAAI4zB,GAAe/7F,KAAKC,MAAMnF,KAAK8jD,YAAYn+C,OAASq7F,EACxD,KAAK,GAAI76C,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,IACiB,GAAzCnmD,KAAKg9C,MAAMmJ,GAAQ0V,oBAA2B77D,KAAKg9C,MAAMmJ,GAAQgJ,aAAaxpD,QAAU,GACtFs7F,EAAe,IACjBjhG,KAAKu/F,oBAAoBv/F,KAAKg9C,MAAMmJ,IAAQ,GAAK,EAAK,GACtD86C,GAAgB,IAa1BrhG,EAAQm+F,kBAAoB,WAC1B,GAAImD,GAAS,EACTC,EAAQ,CACZ,KAAK,GAAIh7C,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KACiB,GAAzCnmD,KAAKg9C,MAAMmJ,GAAQ0V,oBAA2B77D,KAAKg9C,MAAMmJ,GAAQgJ,aAAaxpD,QAAU,IAC1Fu7F,GAAU,GAEZC,GAAS,EAGb,OAAOD,GAAOC,IAMZ,SAASthG,EAAQD,EAASM,GAE9B,GAAIS,GAAOT,EAAoB,GAC3BqD,EAAOrD,EAAoB,GAgB/BN,GAAQ+nD,iBAAmB,WACzB3nD,KAAKkvD,QAAgB,OAAElvD,KAAK88F,WAAW9/C,MAAQh9C,KAAKg9C,MACpDh9C,KAAKkvD,QAAgB,OAAElvD,KAAK88F,WAAWh/C,MAAQ99C,KAAK89C,MACpD99C,KAAKkvD,QAAgB,OAAElvD,KAAK88F,WAAWh5C,YAAc9jD,KAAK8jD,aAa5DlkD,EAAQwhG,gBAAkB,SAASC,EAAUC,GACxB96F,SAAf86F,GAA0C,UAAdA,EAC9BthG,KAAKuhG,sBAAsBF,GAG3BrhG,KAAKwhG,sBAAsBH,IAY/BzhG,EAAQ2hG,sBAAwB,SAASF,GACvCrhG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAEmyC,GAAuB,YACjErhG,KAAKg9C,MAAch9C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAC3DrhG,KAAK89C,MAAc99C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,OAU7DzhG,EAAQ6hG,uBAAyB,WAC/BzhG,KAAK8jD,YAAc9jD,KAAKkvD,QAAiB,QAAe,YACxDlvD,KAAKg9C,MAAch9C,KAAKkvD,QAAiB,QAAS,MAClDlvD,KAAK89C,MAAc99C,KAAKkvD,QAAiB,QAAS,OAWpDtvD,EAAQ4hG,sBAAwB,SAASH,GACvCrhG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAEmyC,GAAuB,YACjErhG,KAAKg9C,MAAch9C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAC3DrhG,KAAK89C,MAAc99C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,OAU7DzhG,EAAQ8hG,kBAAoB,WAC1B1hG,KAAKohG,gBAAgBphG,KAAK88F,YAU5Bl9F,EAAQk9F,QAAU,WAChB,MAAO98F,MAAKstE,aAAattE,KAAKstE,aAAa3nE,OAAO,IAUpD/F,EAAQ+hG,gBAAkB,WACxB,GAAI3hG,KAAKstE,aAAa3nE,OAAS,EAC7B,MAAO3F,MAAKstE,aAAattE,KAAKstE,aAAa3nE,OAAO,EAGlD,MAAM,IAAIU,WAAU,iEAaxBzG,EAAQgiG,iBAAmB,SAASC,GAClC7hG,KAAKstE,aAAatlE,KAAK65F,IAUzBjiG,EAAQkiG,kBAAoB,WAC1B9hG,KAAKstE,aAAajzB,OAWpBz6C,EAAQmiG,iBAAmB,SAASF,GAElC7hG,KAAKkvD,QAAgB,OAAE2yC,IAAU7kD,SACAc,SACAgG,eACAsY,eAAkBp8D,KAAKid,MACvBswD,YAAe/mE,QAGhDxG,KAAKkvD,QAAgB,OAAE2yC,GAAoB,YAAI,GAAIt+F,IAC9ClD,GAAGwhG,EACF12F,OACEgB,WAAY,UACZC,OAAQ,iBAEJpM,KAAK0hD,WACjB1hD,KAAKkvD,QAAgB,OAAE2yC,GAAoB,YAAExlC,YAAc,GAW7Dz8D,EAAQoiG,oBAAsB,SAASX,SAC9BrhG,MAAKkvD,QAAgB,OAAEmyC,IAWhCzhG,EAAQqiG,oBAAsB,SAASZ,SAC9BrhG,MAAKkvD,QAAgB,OAAEmyC,IAWhCzhG,EAAQsiG,cAAgB,SAASb,GAE/BrhG,KAAKkvD,QAAgB,OAAEmyC,GAAYrhG,KAAKkvD,QAAgB,OAAEmyC,GAG1DrhG,KAAKgiG,oBAAoBX,IAW3BzhG,EAAQuiG,gBAAkB,SAASd,GAEjCrhG,KAAKkvD,QAAgB,OAAEmyC,GAAYrhG,KAAKkvD,QAAgB,OAAEmyC,GAG1DrhG,KAAKiiG,oBAAoBZ,IAa3BzhG,EAAQwiG,qBAAuB,SAASf,GAEtC,IAAK,GAAIl7C,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BnmD,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAAEl7C,GAAUnmD,KAAKg9C,MAAMmJ,GAKnE,KAAK,GAAI6G,KAAUhtD,MAAK89C,MAClB99C,KAAK89C,MAAMh4C,eAAeknD,KAC5BhtD,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAAEr0C,GAAUhtD,KAAK89C,MAAMkP,GAKnE,KAAK,GAAIxnD,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAC3CxF,KAAKkvD,QAAgB,OAAEmyC,GAAuB,YAAEr5F,KAAKhI,KAAK8jD,YAAYt+C,KAW1E5F,EAAQyiG,6BAA+B,WACrCriG,KAAKm8F,aAAa,GAAE,IAUtBv8F,EAAQm9F,WAAa,SAASj3C,GAE5B,GAAIw8C,GAAStiG,KAAK88F,gBAWX98F,MAAKg9C,MAAM8I,EAAKzlD,GAEvB,IAAIkiG,GAAmB5hG,EAAKqE,YAG5BhF,MAAKkiG,cAAcI,GAGnBtiG,KAAK+hG,iBAAiBQ,GAGtBviG,KAAK4hG,iBAAiBW,GAGtBviG,KAAKohG,gBAAgBphG,KAAK88F,WAG1B98F,KAAKg9C,MAAM8I,EAAKzlD,IAAMylD,GAUxBlmD,EAAQ49F,gBAAkB,WAExB,GAAI8E,GAAStiG,KAAK88F,SAGlB,IAAc,WAAVwF,IAC8B,GAA3BtiG,KAAK8jD,YAAYn+C,QACpB3F,KAAKkvD,QAAgB,OAAEozC,GAAqB,YAAE/vF,MAAMvS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOC,aACnIxf,KAAKkvD,QAAgB,OAAEozC,GAAqB,YAAE9vF,OAAOxS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOsF,cAAe,CACnJ,GAAI29E,GAAiBxiG,KAAK2hG,iBAG1B3hG,MAAKqiG,+BAILriG,KAAKoiG,qBAAqBI,GAI1BxiG,KAAKgiG,oBAAoBM,GAGzBtiG,KAAKmiG,gBAAgBK,GAGrBxiG,KAAKohG,gBAAgBoB,GAGrBxiG,KAAK8hG,oBAGL9hG,KAAKgnD,uBAGLhnD,KAAKquD,4BAeXzuD,EAAQsxD,sBAAwB,SAASuxC,EAAYC,GACnD,GAAIC,KACJ,IAAiBn8F,SAAbk8F,EACF,IAAK,GAAIJ,KAAUtiG,MAAKkvD,QAAgB,OAClClvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,KAExCtiG,KAAKuhG,sBAAsBe,GAC3BK,EAAa36F,KAAMhI,KAAKyiG,WAK5B,KAAK,GAAIH,KAAUtiG,MAAKkvD,QAAgB,OACtC,GAAIlvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,GAAS,CAEjDtiG,KAAKuhG,sBAAsBe,EAC3B,IAAIrpF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EAEhDi9F,GAAa36F,KADXiR,EAAKtT,OAAS,EACG3F,KAAKyiG,GAAaxpF,EAAK,GAAGA,EAAK,IAG/BjZ,KAAKyiG,GAAaC,IAO7C,MADA1iG,MAAK0hG,oBACEiB,GAaT/iG,EAAQuxD,mBAAqB,SAASsxC,EAAYC,GAChD,GAAIC,IAAe,CACnB,IAAiBn8F,SAAbk8F,EACF1iG,KAAKyhG,yBACLkB,EAAe3iG,KAAKyiG,SAEjB,CACHziG,KAAKyhG,wBACL,IAAIxoF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EAEhDi9F,GADE1pF,EAAKtT,OAAS,EACD3F,KAAKyiG,GAAaxpF,EAAK,GAAGA,EAAK,IAG/BjZ,KAAKyiG,GAAaC,GAKrC,MADA1iG,MAAK0hG,oBACEiB,GAaT/iG,EAAQgjG,sBAAwB,SAASH,EAAYC,GACnD,GAAiBl8F,SAAbk8F,EACF,IAAK,GAAIJ,KAAUtiG,MAAKkvD,QAAgB,OAClClvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,KAExCtiG,KAAKwhG,sBAAsBc,GAC3BtiG,KAAKyiG,UAKT,KAAK,GAAIH,KAAUtiG,MAAKkvD,QAAgB,OACtC,GAAIlvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,GAAS,CAEjDtiG,KAAKwhG,sBAAsBc,EAC3B,IAAIrpF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EAC9CuT,GAAKtT,OAAS,EAChB3F,KAAKyiG,GAAaxpF,EAAK,GAAGA,EAAK,IAG/BjZ,KAAKyiG,GAAaC,GAK1B1iG,KAAK0hG,qBAaP9hG,EAAQ4vD,gBAAkB,SAASizC,EAAYC,GAC7C,GAAIzpF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EACjCc,UAAbk8F,GACF1iG,KAAKkxD,sBAAsBuxC,GAC3BziG,KAAK4iG,sBAAsBH,IAGvBxpF,EAAKtT,OAAS,GAChB3F,KAAKkxD,sBAAsBuxC,EAAYxpF,EAAK,GAAGA,EAAK,IACpDjZ,KAAK4iG,sBAAsBH,EAAYxpF,EAAK,GAAGA,EAAK,MAGpDjZ,KAAKkxD,sBAAsBuxC,EAAYC,GACvC1iG,KAAK4iG,sBAAsBH,EAAYC,KAY7C9iG,EAAQqnD,oBAAsB,WAC5B,GAAIq7C,GAAStiG,KAAK88F,SAClB98F,MAAKkvD,QAAgB,OAAEozC,GAAqB,eAC5CtiG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAEozC,GAAqB,aAWjE1iG,EAAQijG,iBAAmB,SAAS97E,EAAIu6E,GACtC,GAAsDx7C,GAAlDC,EAAO,IAAKC,EAAO,KAAMC,EAAO,IAAKC,EAAO,IAChD,KAAK,GAAIo8C,KAAUtiG,MAAKkvD,QAAQoyC,GAC9B,GAAIthG,KAAKkvD,QAAQoyC,GAAYx7F,eAAew8F,IACc97F,SAApDxG,KAAKkvD,QAAQoyC,GAAYgB,GAAqB,YAAiB,CAEjEtiG,KAAKohG,gBAAgBkB,EAAOhB,GAE5Bv7C,EAAO,IAAKC,EAAO,KAAMC,EAAO,IAAKC,EAAO,IAC5C,KAAK,GAAIC,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBL,EAAKkQ,OAAOjvC,GACRk/B,EAAOH,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,QAAQ0zC,EAAOH,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,OAC9D2zC,EAAOJ,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,QAAQ2zC,EAAOJ,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,OAC9DwzC,EAAOD,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,SAASuzC,EAAOD,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,QAC/DwzC,EAAOF,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,SAASwzC,EAAOF,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,QAGvEszC,GAAO9lD,KAAKkvD,QAAQoyC,GAAYgB,GAAqB,YACrDx8C,EAAK/zC,EAAI,IAAOm0C,EAAOD,GACvBH,EAAK9zC,EAAI,IAAOg0C,EAAOD,GACvBD,EAAKvzC,MAAQ,GAAKuzC,EAAK/zC,EAAIk0C,GAC3BH,EAAKtzC,OAAS,GAAKszC,EAAK9zC,EAAI+zC,GAC5BD,EAAKr3C,QAAQgd,OAASvmB,KAAKyqB,KAAKzqB,KAAK4uB,IAAI,GAAIgyB,EAAKvzC,MAAM,GAAKrN,KAAK4uB,IAAI,GAAIgyB,EAAKtzC,OAAO,IACtFszC,EAAK9iB,SAAShjC,KAAKid,OACnB6oC,EAAKoX,YAAYn2C,KAMzBnnB,EAAQkjG,oBAAsB,SAAS/7E,GACrC/mB,KAAK6iG,iBAAiB97E,EAAI,UAC1B/mB,KAAK6iG,iBAAiB97E,EAAI,UAC1B/mB,KAAK0hG,sBAMH,SAAS7hG,EAAQD,EAASM,GAE9B,GAAIqD,GAAOrD,EAAoB,GAS/BN,GAAQmjG,yBAA2B,SAAS9+F,EAAQqpD,GAClD,GAAItQ,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAImJ,KAAUnJ,GACbA,EAAMl3C,eAAeqgD,IACnBnJ,EAAMmJ,GAAQoH,kBAAkBtpD,IAClCqpD,EAAiBtlD,KAAKm+C,IAY9BvmD,EAAQojG,4BAA8B,SAAU/+F,GAC9C,GAAIqpD,KAEJ,OADAttD,MAAKkxD,sBAAsB,2BAA2BjtD,EAAOqpD,GACtDA,GAWT1tD,EAAQqjG,yBAA2B,SAASpjE,GAC1C,GAAI9tB,GAAI/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACtCC,EAAIhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,EAE1C,QACEvK,KAAQsK,EACRpK,IAAQqK,EACRqV,MAAQtV,EACRuR,OAAQtR,IAYZpS,EAAQgrD,WAAa,SAAU/qB,GAE7B,GAAIqjE,GAAiBljG,KAAKijG,yBAAyBpjE,GAC/CytB,EAAmBttD,KAAKgjG,4BAA4BE,EAIxD,OAAI51C,GAAiB3nD,OAAS,EACpB3F,KAAKg9C,MAAMsQ,EAAiBA,EAAiB3nD,OAAS,IAGvD,MAWX/F,EAAQujG,yBAA2B,SAAUl/F,EAAQwpD,GACnD,GAAI3P,GAAQ99C,KAAK89C,KACjB,KAAK,GAAIkP,KAAUlP,GACbA,EAAMh4C,eAAeknD,IACnBlP,EAAMkP,GAAQO,kBAAkBtpD,IAClCwpD,EAAiBzlD,KAAKglD,IAa9BptD,EAAQwjG,4BAA8B,SAAUn/F,GAC9C,GAAIwpD,KAEJ,OADAztD,MAAKkxD,sBAAsB,2BAA2BjtD,EAAOwpD,GACtDA,GAWT7tD,EAAQqtD,WAAa,SAASptB,GAC5B,GAAIqjE,GAAiBljG,KAAKijG,yBAAyBpjE,GAC/C4tB,EAAmBztD,KAAKojG,4BAA4BF,EAExD,OAAIz1C,GAAiB9nD,OAAS,EACrB3F,KAAK89C,MAAM2P,EAAiBA,EAAiB9nD,OAAS,IAGtD,MAWX/F,EAAQyjG,gBAAkB,SAAStgF,GAC7BA,YAAexf,GACjBvD,KAAKkrD,aAAalO,MAAMj6B,EAAI1iB,IAAM0iB,EAGlC/iB,KAAKkrD,aAAapN,MAAM/6B,EAAI1iB,IAAM0iB,GAUtCnjB,EAAQ0jG,YAAc,SAASvgF,GACzBA,YAAexf,GACjBvD,KAAK4hD,SAAS5E,MAAMj6B,EAAI1iB,IAAM0iB,EAG9B/iB,KAAK4hD,SAAS9D,MAAM/6B,EAAI1iB,IAAM0iB,GAWlCnjB,EAAQ2jG,qBAAuB,SAASxgF,GAClCA,YAAexf,SACVvD,MAAKkrD,aAAalO,MAAMj6B,EAAI1iB,UAG5BL,MAAKkrD,aAAapN,MAAM/6B,EAAI1iB,KAUvCT,EAAQ2+F,aAAe,SAASiF,GACTh9F,SAAjBg9F,IACFA,GAAe,EAEjB,KAAI,GAAIr9C,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACxCnmD,KAAKkrD,aAAalO,MAAMmJ,GAAQjV,UAGpC,KAAI,GAAI8b,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,IACxChtD,KAAKkrD,aAAapN,MAAMkP,GAAQ9b,UAIpClxC,MAAKkrD,cAAgBlO,SAASc,UAEV,GAAhB0lD,GACFxjG,KAAK4tB,KAAK,SAAU5tB,KAAK42B,iBAU7Bh3B,EAAQ6jG,kBAAoB,SAASD,GACdh9F,SAAjBg9F,IACFA,GAAe,EAGjB,KAAK,GAAIr9C,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACrCnmD,KAAKkrD,aAAalO,MAAMmJ,GAAQkW,YAAc,IAChDr8D,KAAKkrD,aAAalO,MAAMmJ,GAAQjV,WAChClxC,KAAKujG,qBAAqBvjG,KAAKkrD,aAAalO,MAAMmJ,IAKpC,IAAhBq9C,GACFxjG,KAAK4tB,KAAK,SAAU5tB,KAAK42B,iBAW7Bh3B,EAAQ8jG,sBAAwB,WAC9B,GAAI1sF,GAAQ,CACZ,KAAK,GAAImvC,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,KACzCnvC,GAAS,EAGb,OAAOA,IASTpX,EAAQ+jG,iBAAmB,WACzB,IAAK,GAAIx9C,KAAUnmD,MAAKkrD,aAAalO,MACnC,GAAIh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,GACzC,MAAOnmD,MAAKkrD,aAAalO,MAAMmJ,EAGnC,OAAO,OASTvmD,EAAQgkG,iBAAmB,WACzB,IAAK,GAAI52C,KAAUhtD,MAAKkrD,aAAapN,MACnC,GAAI99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,GACzC,MAAOhtD,MAAKkrD,aAAapN,MAAMkP,EAGnC,OAAO,OAUTptD,EAAQikG,sBAAwB,WAC9B,GAAI7sF,GAAQ,CACZ,KAAK,GAAIg2C,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,KACzCh2C,GAAS,EAGb,OAAOA,IAUTpX,EAAQkkG,wBAA0B,WAChC,GAAI9sF,GAAQ,CACZ,KAAI,GAAImvC,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,KACxCnvC,GAAS,EAGb,KAAI,GAAIg2C,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,KACxCh2C,GAAS,EAGb,OAAOA,IASTpX,EAAQmkG,kBAAoB,WAC1B,IAAI,GAAI59C,KAAUnmD,MAAKkrD,aAAalO,MAClC,GAAGh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,GACxC,OAAO,CAGX,KAAI,GAAI6G,KAAUhtD,MAAKkrD,aAAapN,MAClC,GAAG99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,GACxC,OAAO,CAGX,QAAO,GAUTptD,EAAQokG,oBAAsB,WAC5B,IAAI,GAAI79C,KAAUnmD,MAAKkrD,aAAalO,MAClC,GAAGh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACpCnmD,KAAKkrD,aAAalO,MAAMmJ,GAAQkW,YAAc,EAChD,OAAO,CAIb,QAAO,GASTz8D,EAAQqkG,sBAAwB,SAASn+C,GACvC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAAK,CACjD,GAAIkoD,GAAO5H,EAAKqJ,aAAa3pD,EAC7BkoD,GAAKvc,SACLnxC,KAAKqjG,gBAAgB31C,KAUzB9tD,EAAQskG,qBAAuB,SAASp+C,GACtC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAAK,CACjD,GAAIkoD,GAAO5H,EAAKqJ,aAAa3pD,EAC7BkoD,GAAKphD,OAAQ,EACbtM,KAAKsjG,YAAY51C,KAWrB9tD,EAAQukG,wBAA0B,SAASr+C,GACzC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAAK,CACjD,GAAIkoD,GAAO5H,EAAKqJ,aAAa3pD,EAC7BkoD,GAAKxc,WACLlxC,KAAKujG,qBAAqB71C,KAgB9B9tD,EAAQmrD,cAAgB,SAAS9mD,EAAQmgG,EAAQZ,EAAca,EAAgBC,GACxD99F,SAAjBg9F,IACFA,GAAe,GAEMh9F,SAAnB69F,IACFA,GAAiB,GAGa,GAA5BrkG,KAAK+jG,qBAA0C,GAAVK,GAAgD,GAA7BpkG,KAAKytE,sBAC/DztE,KAAKu+F,cAAa,GAIG,GAAnBt6F,EAAOovC,UAAmD,GAA7BrzC,KAAK0hD,UAAUzS,aAAsBq1D,EAQ1C,GAAnBrgG,EAAOovC,UACdrzC,KAAKqjG,gBAAgBp/F,GACrBu/F,GAAe,IAGfv/F,EAAOitC,WACPlxC,KAAKujG,qBAAqBt/F,KAb1BA,EAAOktC,SACPnxC,KAAKqjG,gBAAgBp/F,GACjBA,YAAkBV,IAA6C,GAArCvD,KAAKwtE,8BAA2D,GAAlB62B,GAC1ErkG,KAAKikG,sBAAsBhgG,IAaX,GAAhBu/F,GACFxjG,KAAK4tB,KAAK,SAAU5tB,KAAK42B,iBAY7Bh3B,EAAQutD,YAAc,SAASlpD,GACT,GAAhBA,EAAOqI,QACTrI,EAAOqI,OAAQ,EACftM,KAAK4tB,KAAK,YAAYk4B,KAAK7hD,EAAO5D,OAWtCT,EAAQstD,aAAe,SAASjpD,GACV,GAAhBA,EAAOqI,QACTrI,EAAOqI,OAAQ,EACftM,KAAKsjG,YAAYr/F,GACbA,YAAkBV,IACpBvD,KAAK4tB,KAAK,aAAak4B,KAAK7hD,EAAO5D,MAGnC4D,YAAkBV,IACpBvD,KAAKkkG,qBAAqBjgG,IAa9BrE,EAAQ8qD,aAAe,aAUvB9qD,EAAQgsD,WAAa,SAAS/rB,GAC5B,GAAIimB,GAAO9lD,KAAK4qD,WAAW/qB,EAC3B,IAAY,MAARimB,EACF9lD,KAAK+qD,cAAcjF,GAAM,OAEtB,CACH,GAAI4H,GAAO1tD,KAAKitD,WAAWptB,EACf,OAAR6tB,EACF1tD,KAAK+qD,cAAc2C,GAAM,GAGzB1tD,KAAKu+F,eAGT,GAAI5vC,GAAa3uD,KAAK42B,cACtB+3B,GAAoB,SAClB41C,KAAMxyF,EAAG8tB,EAAQ9tB,EAAGC,EAAG6tB,EAAQ7tB,GAC/BuN,QAASxN,EAAG/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GAAIC,EAAGhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,KAEzFhS,KAAK4tB,KAAK,QAAS+gC,GACnB3uD,KAAK6iD,WAUPjjD,EAAQisD,iBAAmB,SAAShsB,GAClC,GAAIimB,GAAO9lD,KAAK4qD,WAAW/qB,EACf,OAARimB,GAAyBt/C,SAATs/C,IAElB9lD,KAAKkkD,YAAenyC,EAAM/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACxCC,EAAMhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAC5DhS,KAAK28F,YAAY72C,GAEnB,IAAI6I,GAAa3uD,KAAK42B,cACtB+3B,GAAoB,SAClB41C,KAAMxyF,EAAG8tB,EAAQ9tB,EAAGC,EAAG6tB,EAAQ7tB,GAC/BuN,QAASxN,EAAG/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GAAIC,EAAGhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,KAEzFhS,KAAK4tB,KAAK,cAAe+gC,IAU3B/uD,EAAQksD,cAAgB,SAASjsB,GAC/B,GAAIimB,GAAO9lD,KAAK4qD,WAAW/qB,EAC3B,IAAY,MAARimB,EACF9lD,KAAK+qD,cAAcjF,GAAK,OAErB,CACH,GAAI4H,GAAO1tD,KAAKitD,WAAWptB,EACf,OAAR6tB,GACF1tD,KAAK+qD,cAAc2C,GAAK,GAG5B1tD,KAAK6iD,WAUPjjD,EAAQosD,iBAAmB,SAASnsB,GAClC7/B,KAAKwkG,6BAA6B3kE,GAClC7/B,KAAKykG,2BAA2B5kE,IAGlCjgC,EAAQ4kG,6BAA+B,aACvC5kG,EAAQ6kG,2BAA6B,aAOrC7kG,EAAQg3B,aAAe,WACrB,GAAIo0B,GAAUhrD,KAAK0kG,mBACfC,EAAU3kG,KAAK4kG,kBACnB,QAAQ5nD,MAAMgO,EAASlN,MAAM6mD,IAS/B/kG,EAAQ8kG,iBAAmB,WACzB,GAAIG,KACJ,IAAiC,GAA7B7kG,KAAK0hD,UAAUzS,WACjB,IAAK,GAAIkX,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACzC0+C,EAAQ78F,KAAKm+C,EAInB,OAAO0+C,IASTjlG,EAAQglG,iBAAmB,WACzB,GAAIC,KACJ,IAAiC,GAA7B7kG,KAAK0hD,UAAUzS,WACjB,IAAK,GAAI+d,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,IACzC63C,EAAQ78F,KAAKglD,EAInB,OAAO63C,IASTjlG,EAAQ82B,aAAe,WACrBiC,QAAQhF,IAAI,gEAUd/zB,EAAQklG,YAAc,SAAS30D,EAAWk0D,GACxC,GAAI7+F,GAAGq7B,EAAMxgC,CAEb,KAAK8vC,GAAkC3pC,QAApB2pC,EAAUxqC,OAC3B,KAAM,qCAKR,KAFA3F,KAAKu+F,cAAa,GAEb/4F,EAAI,EAAGq7B,EAAOsP,EAAUxqC,OAAYk7B,EAAJr7B,EAAUA,IAAK,CAClDnF,EAAK8vC,EAAU3qC,EAEf,IAAIsgD,GAAO9lD,KAAKg9C,MAAM38C,EACtB,KAAKylD,EACH,KAAM,IAAIi/C,YAAW,iBAAmB1kG,EAAK,cAE/CL,MAAK+qD,cAAcjF,GAAK,GAAK,EAAKu+C,GAAe,GAEnDrkG,KAAKyhB,UASP7hB,EAAQolG,YAAc,SAAS70D,GAC7B,GAAI3qC,GAAGq7B,EAAMxgC,CAEb,KAAK8vC,GAAkC3pC,QAApB2pC,EAAUxqC,OAC3B,KAAM,qCAKR,KAFA3F,KAAKu+F,cAAa,GAEb/4F,EAAI,EAAGq7B,EAAOsP,EAAUxqC,OAAYk7B,EAAJr7B,EAAUA,IAAK,CAClDnF,EAAK8vC,EAAU3qC,EAEf,IAAIkoD,GAAO1tD,KAAK89C,MAAMz9C,EACtB,KAAKqtD,EACH,KAAM,IAAIq3C,YAAW,iBAAmB1kG,EAAK,cAE/CL,MAAK+qD,cAAc2C,GAAK,GAAK,GAAK,GAAM,GAE1C1tD,KAAKyhB,UAOP7hB,EAAQuuD,iBAAmB,WACzB,IAAI,GAAIhI,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,KACnCnmD,KAAKg9C,MAAMl3C,eAAeqgD,UACtBnmD,MAAKkrD,aAAalO,MAAMmJ,GAIrC,KAAI,GAAI6G,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,KACnChtD,KAAK89C,MAAMh4C,eAAeknD,UACtBhtD,MAAKkrD,aAAapN,MAAMkP,MASnC,SAASntD,EAAQD,EAASM,GAE9B,GAAIS,GAAOT,EAAoB,GAC3BqD,EAAOrD,EAAoB,IAC3BkD,EAAOlD,EAAoB,GAO/BN,GAAQqlG,qBAAuB,WAC7BjlG,KAAKqqD,oBAAoBrqD,KAAK0tE,iBAC9B1tE,KAAKklG,mBAELllG,KAAKwkG,6BAA+B,mBAC7BxkG,MAAKkvD,QAAiB,QAAS,MAAc,iBAC7ClvD,MAAKkvD,QAAiB,QAAS,MAAiB,cACvDlvD,KAAK6hD,oBAAqB,EAC1B7hD,KAAKujD,kBAAmB,GAU1B3jD,EAAQulG,4BAA8B,WACpC,IAAK,GAAIC,KAAgBplG,MAAKwjD,gBACxBxjD,KAAKwjD,gBAAgB19C,eAAes/F,KACtCplG,KAAKolG,GAAgBplG,KAAKwjD,gBAAgB4hD,SACnCplG,MAAKwjD,gBAAgB4hD,KAUlCxlG,EAAQylG,gBAAkB,WACxBrlG,KAAKkoD,UAAYloD,KAAKkoD,QACtB,IAAIo9C,GAAUtlG,KAAK0tE,gBACfE,EAAW5tE,KAAK4tE,SAChBD,EAAc3tE,KAAK2tE,WACF,IAAjB3tE,KAAKkoD,UACPo9C,EAAQr4F,MAAM26B,QAAQ,QACtBgmC,EAAS3gE,MAAM26B,QAAQ,QACvB+lC,EAAY1gE,MAAM26B,QAAQ,OAC1BgmC,EAAS57C,QAAUhyB,KAAKqlG,gBAAgBvwE,KAAK90B,QAG7CslG,EAAQr4F,MAAM26B,QAAQ,OACtBgmC,EAAS3gE,MAAM26B,QAAQ,OACvB+lC,EAAY1gE,MAAM26B,QAAQ,QAC1BgmC,EAAS57C,QAAU,MAErBhyB,KAAKmnD,yBAQPvnD,EAAQunD,sBAAwB,WAE1BnnD,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,cAG1B,IAAIlhE,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAqBnD,IAnB6B79B,SAAzBxG,KAAKwlG,kBACPxlG,KAAKwlG,gBAAgB/rC,uBACrBz5D,KAAKwlG,gBAAkBh/F,OACvBxG,KAAKylG,oBAAsB,KAC3BzlG,KAAK6hD,oBAAqB,EAC1B7hD,KAAK6iD,WAIP7iD,KAAKmlG,8BAGLnlG,KAAKujD,kBAAmB,EAGxBvjD,KAAKwtE,8BAA+B,EACpCxtE,KAAKytE,sBAAuB,EAC5BztE,KAAKklG,mBAEgB,GAAjBllG,KAAKkoD,SAAkB,CACzB,KAAOloD,KAAK0tE,gBAAgBhqD,iBAC1B1jB,KAAK0tE,gBAAgBv8D,YAAYnR,KAAK0tE,gBAAgB/pD,WAGxD3jB,MAAKklG,gBAA6B,YAAI3zF,SAASM,cAAc,QAC7D7R,KAAKklG,gBAA6B,YAAEr9F,UAAY,6BAChD7H,KAAKklG,gBAAkC,iBAAI3zF,SAASM,cAAc,QAClE7R,KAAKklG,gBAAkC,iBAAEr9F,UAAY,4BACrD7H,KAAKklG,gBAAkC,iBAAEjhF,UAAYogB,EAAgB,QACrErkC,KAAKklG,gBAA6B,YAAEzzF,YAAYzR,KAAKklG,gBAAkC,kBAEvFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA6B,YAAI3zF,SAASM,cAAc,QAC7D7R,KAAKklG,gBAA6B,YAAEr9F,UAAY,iCAChD7H,KAAKklG,gBAAkC,iBAAI3zF,SAASM,cAAc,QAClE7R,KAAKklG,gBAAkC,iBAAEr9F,UAAY,4BACrD7H,KAAKklG,gBAAkC,iBAAEjhF,UAAYogB,EAAgB,QACrErkC,KAAKklG,gBAA6B,YAAEzzF,YAAYzR,KAAKklG,gBAAkC,kBAEvFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA6B,aACnEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA6B,aAE/B,GAAhCllG,KAAK0jG,yBAAgC1jG,KAAK28C,iBAAiBC,MAC7D58C,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA8B,aAAI3zF,SAASM,cAAc,QAC9D7R,KAAKklG,gBAA8B,aAAEr9F,UAAY,8BACjD7H,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,QACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,4BACtD7H,KAAKklG,gBAAmC,kBAAEjhF,UAAYogB,EAAiB,SACvErkC,KAAKklG,gBAA8B,aAAEzzF,YAAYzR,KAAKklG,gBAAmC,mBAEzFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA8B,eAE7B,GAAhCllG,KAAK6jG,yBAAgE,GAAhC7jG,KAAK0jG,0BACjD1jG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA8B,aAAI3zF,SAASM,cAAc,QAC9D7R,KAAKklG,gBAA8B,aAAEr9F,UAAY,8BACjD7H,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,QACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,4BACtD7H,KAAKklG,gBAAmC,kBAAEjhF,UAAYogB,EAAiB,SACvErkC,KAAKklG,gBAA8B,aAAEzzF,YAAYzR,KAAKklG,gBAAmC,mBAEzFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA8B,eAEtC,GAA5BllG,KAAK+jG,sBACP/jG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA4B,WAAI3zF,SAASM,cAAc,QAC5D7R,KAAKklG,gBAA4B,WAAEr9F,UAAY,gCAC/C7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,4BACpD7H,KAAKklG,gBAAiC,gBAAEjhF,UAAYogB,EAAY,IAChErkC,KAAKklG,gBAA4B,WAAEzzF,YAAYzR,KAAKklG,gBAAiC,iBAErFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA4B,aAKpEllG,KAAKklG,gBAA6B,YAAElzE,QAAUhyB,KAAK0lG,sBAAsB5wE,KAAK90B,MAC9EA,KAAKklG,gBAA6B,YAAElzE,QAAUhyB,KAAK2lG,sBAAsB7wE,KAAK90B,MAC1C,GAAhCA,KAAK0jG,yBAAgC1jG,KAAK28C,iBAAiBC,KAC7D58C,KAAKklG,gBAA8B,aAAElzE,QAAUhyB,KAAK4lG,UAAU9wE,KAAK90B,MAE5B,GAAhCA,KAAK6jG,yBAAgE,GAAhC7jG,KAAK0jG,0BACjD1jG,KAAKklG,gBAA8B,aAAElzE,QAAUhyB,KAAK6lG,uBAAuB/wE,KAAK90B,OAElD,GAA5BA,KAAK+jG,sBACP/jG,KAAKklG,gBAA4B,WAAElzE,QAAUhyB,KAAKmqD,gBAAgBr1B,KAAK90B,OAEzEA,KAAK4tE,SAAS57C,QAAUhyB,KAAKqlG,gBAAgBvwE,KAAK90B,KAElD;GAAImU,GAAKnU,IACTA,MAAKulG,cAAgBpxF,EAAGgzC,sBACxBnnD,KAAKuT,GAAG,SAAUvT,KAAKulG,mBAEpB,CACH,KAAOvlG,KAAK2tE,YAAYjqD,iBACtB1jB,KAAK2tE,YAAYx8D,YAAYnR,KAAK2tE,YAAYhqD,WAGhD3jB,MAAKklG,gBAA8B,aAAI3zF,SAASM,cAAc,QAC9D7R,KAAKklG,gBAA8B,aAAEr9F,UAAY,uCACjD7H,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,QACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,4BACtD7H,KAAKklG,gBAAmC,kBAAEjhF,UAAYogB,EAAa,KACnErkC,KAAKklG,gBAA8B,aAAEzzF,YAAYzR,KAAKklG,gBAAmC,mBAEzFllG,KAAK2tE,YAAYl8D,YAAYzR,KAAKklG,gBAA8B,cAEhEllG,KAAKklG,gBAA8B,aAAElzE,QAAUhyB,KAAKqlG,gBAAgBvwE,KAAK90B,QAW7EJ,EAAQ8lG,sBAAwB,WAE9B1lG,KAAKilG,uBACDjlG,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,cAG1B,IAAIlhE,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAEnDrkC,MAAKklG,mBACLllG,KAAKklG,gBAA0B,SAAI3zF,SAASM,cAAc,QAC1D7R,KAAKklG,gBAA0B,SAAEr9F,UAAY,8BAC7C7H,KAAKklG,gBAA+B,cAAI3zF,SAASM,cAAc,QAC/D7R,KAAKklG,gBAA+B,cAAEr9F,UAAY,4BAClD7H,KAAKklG,gBAA+B,cAAEjhF,UAAYogB,EAAa,KAC/DrkC,KAAKklG,gBAA0B,SAAEzzF,YAAYzR,KAAKklG,gBAA+B,eAEjFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,8BACpD7H,KAAKklG,gBAAsC,qBAAI3zF,SAASM,cAAc,QACtE7R,KAAKklG,gBAAsC,qBAAEr9F,UAAY,4BACzD7H,KAAKklG,gBAAsC,qBAAEjhF,UAAYogB,EAAuB,eAChFrkC,KAAKklG,gBAAiC,gBAAEzzF,YAAYzR,KAAKklG,gBAAsC,sBAE/FllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA0B,UAChEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAiC,iBAGvEllG,KAAKklG,gBAA0B,SAAElzE,QAAUhyB,KAAKmnD,sBAAsBryB,KAAK90B,KAG3E,IAAImU,GAAKnU,IACTA,MAAKulG,cAAgBpxF,EAAG2xF,SACxB9lG,KAAKuT,GAAG,SAAUvT,KAAKulG,gBASzB3lG,EAAQ+lG,sBAAwB,WAE9B3lG,KAAKilG,uBACLjlG,KAAKu+F,cAAa,GAClBv+F,KAAKujD,kBAAmB,EAEpBvjD,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,cAG1B,IAAIlhE,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAEnDrkC,MAAKu+F,eACLv+F,KAAKytE,sBAAuB,EAC5BztE,KAAKwtE,8BAA+B,EAEpCxtE,KAAKklG,mBACLllG,KAAKklG,gBAA0B,SAAI3zF,SAASM,cAAc,QAC1D7R,KAAKklG,gBAA0B,SAAEr9F,UAAY,8BAC7C7H,KAAKklG,gBAA+B,cAAI3zF,SAASM,cAAc,QAC/D7R,KAAKklG,gBAA+B,cAAEr9F,UAAY,4BAClD7H,KAAKklG,gBAA+B,cAAEjhF,UAAYogB,EAAa,KAC/DrkC,KAAKklG,gBAA0B,SAAEzzF,YAAYzR,KAAKklG,gBAA+B,eAEjFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,8BACpD7H,KAAKklG,gBAAsC,qBAAI3zF,SAASM,cAAc,QACtE7R,KAAKklG,gBAAsC,qBAAEr9F,UAAY,4BACzD7H,KAAKklG,gBAAsC,qBAAEjhF,UAAYogB,EAAwB,gBACjFrkC,KAAKklG,gBAAiC,gBAAEzzF,YAAYzR,KAAKklG,gBAAsC,sBAE/FllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA0B,UAChEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAiC,iBAGvEllG,KAAKklG,gBAA0B,SAAElzE,QAAUhyB,KAAKmnD,sBAAsBryB,KAAK90B,KAG3E,IAAImU,GAAKnU,IACTA,MAAKulG,cAAgBpxF,EAAG4xF,eACxB/lG,KAAKuT,GAAG,SAAUvT,KAAKulG,eAGvBvlG,KAAKwjD,gBAA8B,aAAIxjD,KAAK0qD,aAC5C1qD,KAAKwjD,gBAA8C,6BAAIxjD,KAAKwkG,6BAC5DxkG,KAAKwjD,gBAAkC,iBAAIxjD,KAAK2qD,iBAChD3qD,KAAKwjD,gBAAgC,eAAIxjD,KAAK2rD,eAC9C3rD,KAAK0qD,aAAe1qD,KAAK+lG,eACzB/lG,KAAKwkG,6BAA+B,aACpCxkG,KAAK2qD,iBAAmB,aACxB3qD,KAAK2rD,eAAiB3rD,KAAKgmG,eAG3BhmG,KAAK6iD,WAQPjjD,EAAQimG,uBAAyB,WAE/B7lG,KAAKilG,uBACLjlG,KAAK6hD,oBAAqB,EAEtB7hD,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,eAG1BvlG,KAAKwlG,gBAAkBxlG,KAAK4jG,mBAC5B5jG,KAAKwlG,gBAAgBhsC,qBAErB,IAAIn1B,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAEnDrkC,MAAKklG,mBACLllG,KAAKklG,gBAA0B,SAAI3zF,SAASM,cAAc,QAC1D7R,KAAKklG,gBAA0B,SAAEr9F,UAAY,8BAC7C7H,KAAKklG,gBAA+B,cAAI3zF,SAASM,cAAc,QAC/D7R,KAAKklG,gBAA+B,cAAEr9F,UAAY,4BAClD7H,KAAKklG,gBAA+B,cAAEjhF,UAAYogB,EAAa,KAC/DrkC,KAAKklG,gBAA0B,SAAEzzF,YAAYzR,KAAKklG,gBAA+B,eAEjFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,8BACpD7H,KAAKklG,gBAAsC,qBAAI3zF,SAASM,cAAc,QACtE7R,KAAKklG,gBAAsC,qBAAEr9F,UAAY,4BACzD7H,KAAKklG,gBAAsC,qBAAEjhF,UAAYogB,EAA4B,oBACrFrkC,KAAKklG,gBAAiC,gBAAEzzF,YAAYzR,KAAKklG,gBAAsC,sBAE/FllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA0B,UAChEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAiC,iBAGvEllG,KAAKklG,gBAA0B,SAAElzE,QAAUhyB,KAAKmnD,sBAAsBryB,KAAK90B,MAG3EA,KAAKwjD,gBAA8B,aAASxjD,KAAK0qD,aACjD1qD,KAAKwjD,gBAA8C,6BAAKxjD,KAAKwkG,6BAC7DxkG,KAAKwjD,gBAA4B,WAAWxjD,KAAK4rD,WACjD5rD,KAAKwjD,gBAAkC,iBAAKxjD,KAAK2qD,iBACjD3qD,KAAKwjD,gBAA+B,cAAQxjD,KAAKqrD,cACjDrrD,KAAK0qD,aAAmB1qD,KAAKimG,mBAC7BjmG,KAAK4rD,WAAmB,aACxB5rD,KAAKqrD,cAAmBrrD,KAAKkmG,iBAC7BlmG,KAAK2qD,iBAAmB,aACxB3qD,KAAKwkG,6BAA+BxkG,KAAKmmG,oBAGzCnmG,KAAK6iD,WAUPjjD,EAAQqmG,mBAAqB,SAASpmE,GACpC7/B,KAAKwlG,gBAAgBhxC,aAAaprC,KAAK8nB,WACvClxC,KAAKwlG,gBAAgBhxC,aAAanrC,GAAG6nB,WACrClxC,KAAKylG,oBAAsBzlG,KAAKwlG,gBAAgB9rC,wBAAwB15D,KAAKurD,qBAAqB1rB,EAAQ9tB,GAAG/R,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAC9G,OAA7BhS,KAAKylG,sBACPzlG,KAAKylG,oBAAoBt0D,SACzBnxC,KAAKujD,kBAAmB,GAE1BvjD,KAAK6iD,WAUPjjD,EAAQsmG,iBAAmB,SAAS58F,GAClC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACJ,QAA7BlsB,KAAKylG,qBAA6Dj/F,SAA7BxG,KAAKylG,sBAC5CzlG,KAAKylG,oBAAoB1zF,EAAI/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GAC/D/R,KAAKylG,oBAAoBzzF,EAAIhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAEjEhS,KAAK6iD,WASPjjD,EAAQumG,oBAAsB,SAAStmE,GACrC,GAAIumE,GAAUpmG,KAAK4qD,WAAW/qB,EACd,QAAZumE,GACqD,GAAnDpmG,KAAKwlG,gBAAgBhxC,aAAaprC,KAAKiqB,WACzCrzC,KAAKwlG,gBAAgB3rC,uBACrB75D,KAAKqmG,UAAUD,EAAQ/lG,GAAIL,KAAKwlG,gBAAgBn8E,GAAGhpB,IACnDL,KAAKwlG,gBAAgBhxC,aAAaprC,KAAK8nB,YAEY,GAAjDlxC,KAAKwlG,gBAAgBhxC,aAAanrC,GAAGgqB,WACvCrzC,KAAKwlG,gBAAgB3rC,uBACrB75D,KAAKqmG,UAAUrmG,KAAKwlG,gBAAgBp8E,KAAK/oB,GAAI+lG,EAAQ/lG,IACrDL,KAAKwlG,gBAAgBhxC,aAAanrC,GAAG6nB,aAIvClxC,KAAKwlG,gBAAgB3rC,uBAEvB75D,KAAKujD,kBAAmB,EACxBvjD,KAAK6iD,WASPjjD,EAAQmmG,eAAiB,SAASlmE,GAChC,GAAoC,GAAhC7/B,KAAK0jG,wBAA8B,CACrC,GAAI59C,GAAO9lD,KAAK4qD,WAAW/qB,EAE3B,IAAY,MAARimB,EACF,GAAIA,EAAKuW,YAAc,EACrBiqC,MAAMtmG,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,QAAyB,qBAElE,CACHrkC,KAAK+qD,cAAcjF,GAAK,EACxB,IAAIygD,GAAevmG,KAAKkvD,QAAiB,QAAS,KAGlDq3C,GAAyB,WAAI,GAAIhjG,IAAMlD,GAAG,oBAAoBL,KAAK0hD,UACnE,IAAI8kD,GAAaD,EAAyB,UAC1CC,GAAWz0F,EAAI+zC,EAAK/zC,EACpBy0F,EAAWx0F,EAAI8zC,EAAK9zC,EAGpBhS,KAAK89C,MAAsB,eAAI,GAAI16C,IAAM/C,GAAG,iBAAiB+oB,KAAK08B,EAAKzlD,GAAGgpB,GAAGm9E,EAAWnmG,IAAKL,KAAMA,KAAK0hD,UACxG,IAAI+kD,GAAiBzmG,KAAK89C,MAAsB,cAChD2oD,GAAer9E,KAAO08B,EACtB2gD,EAAe94C,WAAY,EAC3B84C,EAAeh4F,QAAQqyC,cAAgBpyC,SAAS,EAC5CqyC,SAAS,EACTj6C,KAAM,aACNk6C,UAAW,IAEfylD,EAAepzD,UAAW,EAC1BozD,EAAep9E,GAAKm9E,EAEpBxmG,KAAKwjD,gBAA+B,cAAIxjD,KAAKqrD,cAC7CrrD,KAAKqrD,cAAgB,SAAS/hD,GAC5B,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,QACjCu6E,EAAiBzmG,KAAK89C,MAAsB,cAChD2oD,GAAep9E,GAAGtX,EAAI/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACxD00F,EAAep9E,GAAGrX,EAAIhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAG1DhS,KAAK8kD,QAAS,EACd9kD,KAAK4P,WAMbhQ,EAAQomG,eAAiB,SAAS18F,GAChC,GAAoC,GAAhCtJ,KAAK0jG,wBAA8B,CACrC,GAAI7jE,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OAErClsB,MAAKqrD,cAAgBrrD,KAAKwjD,gBAA+B,oBAClDxjD,MAAKwjD,gBAA+B,aAG3C,IAAIkjD,GAAgB1mG,KAAK89C,MAAsB,eAAE6V,aAG1C3zD,MAAK89C,MAAsB,qBAC3B99C,MAAKkvD,QAAiB,QAAS,MAAc,iBAC7ClvD,MAAKkvD,QAAiB,QAAS,MAAiB,aAEvD,IAAIpJ,GAAO9lD,KAAK4qD,WAAW/qB,EACf,OAARimB,IACEA,EAAKuW,YAAc,EACrBiqC,MAAMtmG,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,QAAyB,kBAGrErkC,KAAK2mG,YAAYD,EAAc5gD,EAAKzlD,IACpCL,KAAKmnD,0BAGTnnD,KAAKu+F,iBAQT3+F,EAAQkmG,SAAW,WACjB,GAAI9lG,KAAK+jG,qBAAwC,GAAjB/jG,KAAKkoD,SAAkB,CACrD,GAAIg7C,GAAiBljG,KAAKijG,yBAAyBjjG,KAAKikD,iBACpD2iD,GAAevmG,GAAGM,EAAKqE,aAAa+M,EAAEmxF,EAAez7F,KAAKuK,EAAEkxF,EAAev7F,IAAI8gB,MAAM,MAAM+pC,gBAAe,EAAKC,gBAAe,EAClI,IAAIzyD,KAAK28C,iBAAiB1pC,IAAK,CAC7B,GAAwC,GAApCjT,KAAK28C,iBAAiB1pC,IAAItN,OAU5B,KAAM,IAAI/B,OAAM,sEAThB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiB1pC,IAAI2zF,EAAa,SAASC,GAC9C1yF,EAAGiwC,UAAUnxC,IAAI4zF,GACjB1yF,EAAGgzC,wBACHhzC,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAWP5P,MAAKokD,UAAUnxC,IAAI2zF,GACnB5mG,KAAKmnD,wBACLnnD,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAWXhQ,EAAQ+mG,YAAc,SAASG,EAAaC,GAC1C,GAAqB,GAAjB/mG,KAAKkoD,SAAkB,CACzB,GAAI0+C,IAAex9E,KAAK09E,EAAcz9E,GAAG09E,EACzC,IAAI/mG,KAAK28C,iBAAiBG,QAAS,CACjC,GAA4C,GAAxC98C,KAAK28C,iBAAiBG,QAAQn3C,OAShC,KAAM,IAAI/B,OAAM,0EARhB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiBG,QAAQ8pD,EAAa,SAASC,GAClD1yF,EAAGkwC,UAAUpxC,IAAI4zF,GACjB1yF,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAUP5P,MAAKqkD,UAAUpxC,IAAI2zF,GACnB5mG,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAUXhQ,EAAQymG,UAAY,SAASS,EAAaC,GACxC,GAAqB,GAAjB/mG,KAAKkoD,SAAkB,CACzB,GAAI0+C,IAAevmG,GAAIL,KAAKwlG,gBAAgBnlG,GAAI+oB,KAAK09E,EAAcz9E,GAAG09E,EACtE,IAAI/mG,KAAK28C,iBAAiBE,SAAU,CAClC,GAA6C,GAAzC78C,KAAK28C,iBAAiBE,SAASl3C,OASjC,KAAM,IAAI/B,OAAM,wEARhB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiBE,SAAS+pD,EAAa,SAASC,GACnD1yF,EAAGkwC,UAAUxvC,OAAOgyF,GACpB1yF,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAUP5P,MAAKqkD,UAAUxvC,OAAO+xF,GACtB5mG,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAUXhQ,EAAQgmG,UAAY,WAClB,IAAI5lG,KAAK28C,iBAAiBC,MAAyB,GAAjB58C,KAAKkoD,SA4BrC,KAAM,IAAItkD,OAAM,iDA3BhB,IAAIkiD,GAAO9lD,KAAK2jG,mBACZjxF,GAAQrS,GAAGylD,EAAKzlD,GAClBooB,MAAOq9B,EAAKr9B,MACZxW,MAAO6zC,EAAKr3C,QAAQwD,MACpBmrC,MAAO0I,EAAKr3C,QAAQ2uC,MACpBjyC,OACEgB,WAAW25C,EAAKr3C,QAAQtD,MAAMgB,WAC9BC,OAAO05C,EAAKr3C,QAAQtD,MAAMiB,OAC1BC,WACEF,WAAW25C,EAAKr3C,QAAQtD,MAAMkB,UAAUF,WACxCC,OAAO05C,EAAKr3C,QAAQtD,MAAMkB,UAAUD,SAG1C,IAAyC,GAArCpM,KAAK28C,iBAAiBC,KAAKj3C,OAU7B,KAAM,IAAI/B,OAAM,wEAThB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiBC,KAAKlqC,EAAM,SAAUm0F,GACzC1yF,EAAGiwC,UAAUvvC,OAAOgyF,GACpB1yF,EAAGgzC,wBACHhzC,EAAG2wC,QAAS,EACZ3wC,EAAGvE,WAoBXhQ,EAAQuqD,gBAAkB,WACxB,IAAKnqD,KAAK+jG,qBAAwC,GAAjB/jG,KAAKkoD,SACpC,GAAKloD,KAAKgkG,sBA4BRsC,MAAMtmG,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,QAA4B,wBA5BzC,CAC/B,GAAI2iE,GAAgBhnG,KAAK0kG,mBACrBuC,EAAgBjnG,KAAK4kG,kBACzB,IAAI5kG,KAAK28C,iBAAiBI,IAAK,CAC7B,GAAI5oC,GAAKnU,KACL0S,GAAQsqC,MAAOgqD,EAAelpD,MAAOmpD,EACzC,IAAwC,GAApCjnG,KAAK28C,iBAAiBI,IAAIp3C,OAU5B,KAAM,IAAI/B,OAAM,0EAThB5D,MAAK28C,iBAAiBI,IAAIrqC,EAAM,SAAUm0F,GACxC1yF,EAAGkwC,UAAUhuC,OAAOwwF,EAAc/oD,OAClC3pC,EAAGiwC,UAAU/tC,OAAOwwF,EAAc7pD,OAClC7oC,EAAGoqF,eACHpqF,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAQP5P,MAAKqkD,UAAUhuC,OAAO4wF,GACtBjnG,KAAKokD,UAAU/tC,OAAO2wF,GACtBhnG,KAAKu+F,eACLv+F,KAAK8kD,QAAS,EACd9kD,KAAK4P,WAYT,SAAS/P,EAAQD,EAASM,GAE9B,GACIklD,IADOllD,EAAoB,GACdA,EAAoB,KACjC6D,EAAS7D,EAAoB,GAEjCN,GAAQiuE,iBAAmB,WAEzB,GAA8C,GAA1C7tE,KAAK8hD,kBAAkBC,SAASp8C,OAAa,CAC/C,IAAK,GAAIH,GAAI,EAAGA,EAAIxF,KAAK8hD,kBAAkBC,SAASp8C,OAAQH,IAC1DxF,KAAK8hD,kBAAkBC,SAASv8C,GAAG8N,SAErCtT,MAAK8hD,kBAAkBC,YAGzB/hD,KAAKykG,2BAA6B,aAG9BzkG,KAAKknG,gBAAkBlnG,KAAKknG,eAAwB,SAAKlnG,KAAKknG,eAAwB,QAAEr9F,YAC1F7J,KAAKknG,eAAwB,QAAEr9F,WAAWsH,YAAYnR,KAAKknG,eAAwB,UAYvFtnG,EAAQkuE,wBAA0B,WAChC9tE,KAAK6tE,mBAEL7tE,KAAKknG,iBACL,IAAIA,IAAkB,KAAK,OAAO,OAAO,QAAQ,SAAS,UAAU,eAChEC,GAAwB,UAAU,YAAY,YAAY,aAAa,UAAU,WAAW,cAEhGnnG,MAAKknG,eAAwB,QAAI31F,SAASM,cAAc,OACxD7R,KAAKsf,MAAM7N,YAAYzR,KAAKknG,eAAwB,QAEpD,KAAK,GAAI1hG,GAAI,EAAGA,EAAI0hG,EAAevhG,OAAQH,IAAK,CAC9CxF,KAAKknG,eAAeA,EAAe1hG,IAAM+L,SAASM,cAAc,OAChE7R,KAAKknG,eAAeA,EAAe1hG,IAAIqC,UAAY,sBAAwBq/F,EAAe1hG,GAC1FxF,KAAKknG,eAAwB,QAAEz1F,YAAYzR,KAAKknG,eAAeA,EAAe1hG,IAE9E,IAAI1B,GAAS,GAAIC,GAAO/D,KAAKknG,eAAeA,EAAe1hG,KAAMwoE,iBAAiB,GAClF5oB,GAAWkE,QAAQxlD,EAAQ9D,KAAKmnG,EAAqB3hG,IAAIsvB,KAAK90B,OAC9DolD,EAAW6hB,UAAUnjE,EAAQ9D,KAAK+rD,WAAWj3B,KAAK90B,OAElDA,KAAK8hD,kBAAkBE,KAAKh6C,KAAKlE,GAGnC9D,KAAKykG,2BAA6BzkG,KAAKonG,cAEvCpnG,KAAK8hD,kBAAkBC,SAAW/hD,KAAK8hD,kBAAkBE,MAS3DpiD,EAAQynG,YAAc,SAAS/9F,GAC7BtJ,KAAKilD,YAAYn1C,SAAS,MAC1BxG,EAAMq8B,mBAQR/lC,EAAQwnG,cAAgB,WACtBpnG,KAAK8pD,eACL9pD,KAAK2pD,eACL3pD,KAAKiqD,aAYPrqD,EAAQ8pD,QAAU,SAASpgD,GACzBtJ,KAAK+iD,WAAa/iD,KAAK0hD,UAAUrB,SAASC,MAAMtuC,EAChDhS,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQgqD,UAAY,SAAStgD,GAC3BtJ,KAAK+iD,YAAc/iD,KAAK0hD,UAAUrB,SAASC,MAAMtuC,EACjDhS,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQiqD,UAAY,SAASvgD,GAC3BtJ,KAAK8iD,WAAa9iD,KAAK0hD,UAAUrB,SAASC,MAAMvuC,EAChD/R,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQmqD,WAAa,SAASzgD,GAC5BtJ,KAAK8iD,YAAc9iD,KAAK0hD,UAAUrB,SAASC,MAAMtuC,EACjDhS,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQoqD,QAAU,SAAS1gD,GACzBtJ,KAAKgjD,cAAgBhjD,KAAK0hD,UAAUrB,SAASC,MAAMpgB,KACnDlgC,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQsqD,SAAW,SAAS5gD,GAC1BtJ,KAAKgjD,eAAiBhjD,KAAK0hD,UAAUrB,SAASC,MAAMpgB,KACpDlgC,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQqqD,UAAY,SAAS3gD,GAC3BtJ,KAAKgjD,cAAgB,EACrB15C,GAASA,EAAMD,kBAQjBzJ,EAAQ+pD,aAAe,SAASrgD,GAC9BtJ,KAAK+iD,WAAa,EAClBz5C,GAASA,EAAMD,kBAQjBzJ,EAAQkqD,aAAe,SAASxgD,GAC9BtJ,KAAK8iD,WAAa,EAClBx5C,GAASA,EAAMD,mBAMb,SAASxJ,EAAQD,GAErBA,EAAQgoD,aAAe,WACrB,IAAK,GAAIzB,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EACO,IAAzBL,EAAKuV,mBACPvV,EAAKlI,MAAQ,GACbkI,EAAKwV,qBAAsB,KAYnC17D,EAAQolD,yBAA2B,WACjC,GAAiD,GAA7ChlD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAmB1O,KAAK8jD,YAAYn+C,OAAS,EAAG,CAEpF,GACImgD,GAAMK,EADNmhD,EAAU,EAEVC,GAAe,EACfC,GAAiB,CAErB,KAAKrhD,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACA,IAAdL,EAAKlI,MACP2pD,GAAe,EAGfC,GAAiB,EAEfF,EAAUxhD,EAAKhI,MAAMn4C,SACvB2hG,EAAUxhD,EAAKhI,MAAMn4C,QAM3B,IAAsB,GAAlB6hG,GAA0C,GAAhBD,EAC5B,KAAM,IAAI3jG,OAAM,wHAQhB5D,MAAKynG,mBAGiB,GAAlBD,IAC8C,WAA5CxnG,KAAK0hD,UAAUjB,mBAAmBG,OACpC5gD,KAAK0nG,iBAAiBJ,GAGtBtnG,KAAK2nG,0BAAyB,GAKlC,IAAIC,GAAe5nG,KAAK6nG,kBAGxB7nG,MAAK8nG,uBAAuBF,GAG5B5nG,KAAK4P,UAYXhQ,EAAQkoG,uBAAyB,SAASF,GACxC,GAAIzhD,GAAQL,CAGZ,KAAK,GAAIlI,KAASgqD,GAChB,GAAIA,EAAa9hG,eAAe83C,GAE9B,IAAKuI,IAAUyhD,GAAahqD,GAAOZ,MAC7B4qD,EAAahqD,GAAOZ,MAAMl3C,eAAeqgD,KAC3CL,EAAO8hD,EAAahqD,GAAOZ,MAAMmJ,GACkB,MAA/CnmD,KAAK0hD,UAAUjB,mBAAmBvlB,WAAoE,MAA/Cl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UACvF4qB,EAAKqF,SACPrF,EAAK/zC,EAAI61F,EAAahqD,GAAOmqD,OAC7BjiD,EAAKqF,QAAS,EAEdy8C,EAAahqD,GAAOmqD,QAAUH,EAAahqD,GAAO+C,aAIhDmF,EAAKsF,SACPtF,EAAK9zC,EAAI41F,EAAahqD,GAAOmqD,OAC7BjiD,EAAKsF,QAAS,EAEdw8C,EAAahqD,GAAOmqD,QAAUH,EAAahqD,GAAO+C,aAGtD3gD,KAAKgoG,kBAAkBliD,EAAKhI,MAAMgI,EAAKzlD,GAAGunG,EAAa9hD,EAAKlI,OAOpE59C,MAAK6nD,cAUPjoD,EAAQioG,iBAAmB,WACzB,GACI1hD,GAAQL,EAAMlI,EADdgqD,IAKJ,KAAKzhD,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBL,EAAKqF,QAAS,EACdrF,EAAKsF,QAAS,EACqC,MAA/CprD,KAAK0hD,UAAUjB,mBAAmBvlB,WAAoE,MAA/Cl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UAC3F4qB,EAAK9zC,EAAIhS,KAAK0hD,UAAUjB,mBAAmBC,gBAAgBoF,EAAKlI,MAGhEkI,EAAK/zC,EAAI/R,KAAK0hD,UAAUjB,mBAAmBC,gBAAgBoF,EAAKlI,MAEjCp3C,SAA7BohG,EAAa9hD,EAAKlI,SACpBgqD,EAAa9hD,EAAKlI,QAAUquB,OAAQ,EAAGjvB,SAAW+qD,OAAO,EAAGpnD,YAAY,IAE1EinD,EAAa9hD,EAAKlI,OAAOquB,QAAU,EACnC27B,EAAa9hD,EAAKlI,OAAOZ,MAAMmJ,GAAUL,EAK7C,IAAImiD,GAAW,CACf,KAAKrqD,IAASgqD,GACRA,EAAa9hG,eAAe83C,IAC1BqqD,EAAWL,EAAahqD,GAAOquB,SACjCg8B,EAAWL,EAAahqD,GAAOquB,OAMrC,KAAKruB,IAASgqD,GACRA,EAAa9hG,eAAe83C,KAC9BgqD,EAAahqD,GAAO+C,aAAesnD,EAAW,GAAKjoG,KAAK0hD,UAAUjB,mBAAmBE,YACrFinD,EAAahqD,GAAO+C,aAAgBinD,EAAahqD,GAAOquB,OAAS,EACjE27B,EAAahqD,GAAOmqD,OAASH,EAAahqD,GAAO+C,YAAe,IAAOinD,EAAahqD,GAAOquB,OAAS,GAAK27B,EAAahqD,GAAO+C,YAIjI,OAAOinD,IAUThoG,EAAQ8nG,iBAAmB,SAASJ,GAClC,GAAInhD,GAAQL,CAGZ,KAAKK,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACdL,EAAKhI,MAAMn4C,QAAU2hG,IACvBxhD,EAAKlI,MAAQ,GAMnB,KAAKuI,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACA,GAAdL,EAAKlI,OACP59C,KAAKkoG,UAAU,EAAEpiD,EAAKhI,MAAMgI,EAAKzlD,MAczCT,EAAQ+nG,yBAA2B,WACjC,GAAIxhD,GAAQL,EAAMqiD,EACd5H,EAAW,GAGf4H,GAAYnoG,KAAKg9C,MAAMh9C,KAAK8jD,YAAY,IACxCqkD,EAAUvqD,MAAQ2iD,EAClBvgG,KAAKooG,kBAAkB7H,EAAS4H,EAAUrqD,MAAMqqD,EAAU9nG,GAG1D,KAAK8lD,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBo6C,EAAWz6C,EAAKlI,MAAQ2iD,EAAWz6C,EAAKlI,MAAQ2iD,EAKpD,KAAKp6C,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBL,EAAKlI,OAAS2iD,IAepB3gG,EAAQ6nG,iBAAmB,WACzBznG,KAAK0hD,UAAUvC,WAAWzwC,SAAU,EACpC1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,EAC3C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAKmtE,2BACsC,GAAvCntE,KAAK0hD,UAAUZ,aAAapyC,UAC9B1O,KAAK0hD,UAAUZ,aAAaC,SAAU,GAExC/gD,KAAK0oD,wBAEL,IAAI8oB,GAASxxE,KAAK0hD,UAAUjB,kBAC5B+wB,GAAO9wB,gBAAkBx7C,KAAK2lB,IAAI2mD,EAAO9wB,kBACjB,MAApB8wB,EAAOt2C,WAAyC,MAApBs2C,EAAOt2C,aACrCs2C,EAAO9wB,iBAAmB,IAGJ,MAApB8wB,EAAOt2C,WAAyC,MAApBs2C,EAAOt2C,UACM,GAAvCl7B,KAAK0hD,UAAUZ,aAAapyC,UAC9B1O,KAAK0hD,UAAUZ,aAAah6C,KAAO,YAIM,GAAvC9G,KAAK0hD,UAAUZ,aAAapyC,UAC9B1O,KAAK0hD,UAAUZ,aAAah6C,KAAO,eAgBzClH,EAAQooG,kBAAoB,SAASlqD,EAAOuqD,EAAUT,EAAcU,GAClE,IAAK,GAAI9iG,GAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IAAK,CACrC,GAAI64F,GAAY,IAEdA,GADEvgD,EAAMt4C,GAAGouD,MAAQy0C,EACPvqD,EAAMt4C,GAAG4jB,KAGT00B,EAAMt4C,GAAG6jB,EAIvB,IAAIk/E,IAAY,CACmC,OAA/CvoG,KAAK0hD,UAAUjB,mBAAmBvlB,WAAoE,MAA/Cl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UACvFmjE,EAAUlzC,QAAUkzC,EAAUzgD,MAAQ0qD,IACxCjK,EAAUlzC,QAAS,EACnBkzC,EAAUtsF,EAAI61F,EAAavJ,EAAUzgD,OAAOmqD,OAC5CQ,GAAY,GAIVlK,EAAUjzC,QAAUizC,EAAUzgD,MAAQ0qD,IACxCjK,EAAUjzC,QAAS,EACnBizC,EAAUrsF,EAAI41F,EAAavJ,EAAUzgD,OAAOmqD,OAC5CQ,GAAY,GAIC,GAAbA,IACFX,EAAavJ,EAAUzgD,OAAOmqD,QAAUH,EAAavJ,EAAUzgD,OAAO+C,YAClE09C,EAAUvgD,MAAMn4C,OAAS,GAC3B3F,KAAKgoG,kBAAkB3J,EAAUvgD,MAAMugD,EAAUh+F,GAAGunG,EAAavJ,EAAUzgD,UAenFh+C,EAAQsoG,UAAY,SAAStqD,EAAOE,EAAOuqD,GACzC,IAAK,GAAI7iG,GAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IAAK,CACrC,GAAI64F,GAAY,IAEdA,GADEvgD,EAAMt4C,GAAGouD,MAAQy0C,EACPvqD,EAAMt4C,GAAG4jB,KAGT00B,EAAMt4C,GAAG6jB,IAEA,IAAnBg1E,EAAUzgD,OAAeygD,EAAUzgD,MAAQA,KAC7CygD,EAAUzgD,MAAQA,EACdygD,EAAUvgD,MAAMn4C,OAAS,GAC3B3F,KAAKkoG,UAAUtqD,EAAM,EAAGygD,EAAUvgD,MAAOugD,EAAUh+F,OAe3DT,EAAQwoG,kBAAoB,SAASxqD,EAAOE,EAAOuqD,GACjDroG,KAAKg9C,MAAMqrD,GAAU/sC,qBAAsB,CAE3C,KAAK,GADD+iC,GAAWnjE,EACN11B,EAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IAChC01B,EAAY,EACR4iB,EAAMt4C,GAAGouD,MAAQy0C,GACnBhK,EAAYvgD,EAAMt4C,GAAG4jB,KACrB8R,EAAY,IAGZmjE,EAAYvgD,EAAMt4C,GAAG6jB,GAEA,IAAnBg1E,EAAUzgD,QACZygD,EAAUzgD,MAAQA,EAAQ1iB,EAI9B,KAAK,GAAI11B,GAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IACA64F,EAA5BvgD,EAAMt4C,GAAGouD,MAAQy0C,EAAuBvqD,EAAMt4C,GAAG4jB,KACnC00B,EAAMt4C,GAAG6jB,GAEvBg1E,EAAUvgD,MAAMn4C,OAAS,GAAK04F,EAAU/iC,uBAAwB,GAClEt7D,KAAKooG,kBAAkB/J,EAAUzgD,MAAOygD,EAAUvgD,MAAOugD,EAAUh+F,KAWzET,EAAQ4oG,cAAgB,WACtB,IAAK,GAAIriD,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BnmD,KAAKg9C,MAAMmJ,GAAQgF,QAAS,EAC5BnrD,KAAKg9C,MAAMmJ,GAAQiF,QAAS,KAQ9B,SAASvrD,EAAQD,EAASM,GAkgB9B,QAASuoG,KACPzoG,KAAK0hD,UAAUZ,aAAapyC,SAAW1O,KAAK0hD,UAAUZ,aAAapyC,OACnE,IAAIg6F,GAAqBn3F,SAASo3F,eAAe,qBACCD,GAAmBz7F,MAAMd,WAAhC,GAAvCnM,KAAK0hD,UAAUZ,aAAapyC,QAAwD,UACR,UAEhF1O,KAAK0oD,wBAAuB,GAO9B,QAASkgD,KACP,IAAK,GAAIziD,KAAUnmD,MAAK4jD,iBAClB5jD,KAAK4jD,iBAAiB99C,eAAeqgD,KACvCnmD,KAAK4jD,iBAAiBuC,GAAQsV,GAAK,EAAIz7D,KAAK4jD,iBAAiBuC,GAAQuV,GAAK,EAC1E17D,KAAK4jD,iBAAiBuC,GAAQoV,GAAK,EAAIv7D,KAAK4jD,iBAAiBuC,GAAQqV,GAAK,EAG7B,IAA7Cx7D,KAAK0hD,UAAUjB,mBAAmB/xC,SACpC1O,KAAKglD,2BACL6jD,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,8CAC7C6oG,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,0BAC7C6oG,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,0BAC7C6oG,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,wBAC7C6oG,EAAiBtoG,KAAKP,KAAM,eAAgB,EAAG,oBAG/CA,KAAK08F,kBAEP18F,KAAK8kD,QAAS,EACd9kD,KAAK4P,QAMP,QAASk5F,KACP,GAAIr6F,GAAU,gDACVs6F,KACAC,EAAez3F,SAASo3F,eAAe,wBACvCM,EAAe13F,SAASo3F,eAAe,uBAC3C,IAA4B,GAAxBK,EAAaE,QAAiB,CAMhC,GALIlpG,KAAK0hD,UAAUlD,QAAQC,UAAUE,uBAAyB3+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUE,uBAAwBoqD,EAAgB/gG,KAAK,0BAA4BhI,KAAK0hD,UAAUlD,QAAQC,UAAUE,uBAC3M3+C,KAAK0hD,UAAUlD,QAAQI,gBAAkB5+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUG,gBAAyCmqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQI,gBAC1L5+C,KAAK0hD,UAAUlD,QAAQK,cAAgB7+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUI,cAA2CkqD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQK,cACxL7+C,KAAK0hD,UAAUlD,QAAQM,gBAAkB9+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUK,gBAAyCiqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQM,gBAC1L9+C,KAAK0hD,UAAUlD,QAAQO,SAAW/+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUM,SAAgDgqD,EAAgB/gG,KAAK,YAAchI,KAAK0hD,UAAUlD,QAAQO,SACzJ,GAA1BgqD,EAAgBpjG,OAAa,CAC/B8I,EAAU,kBACVA,GAAW,wBACX,KAAK,GAAIjJ,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,KAETzO,KAAK0hD,UAAUZ,aAAapyC,SAAW1O,KAAKmpG,gBAAgBroD,aAAapyC,UAC7C,GAA1Bq6F,EAAgBpjG,OAAc8I,EAAU,kBACtCA,GAAW,KACjBA,GAAW,iBAAmBzO,KAAK0hD,UAAUZ,aAAapyC,SAE7C,iDAAXD,IACFA,GAAW,UAGV,IAA4B,GAAxBw6F,EAAaC,QAAiB,CAQrC,GAPAz6F,EAAU,kBACVA,GAAW,wCACPzO,KAAK0hD,UAAUlD,QAAQQ,UAAUC,cAAgBj/C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUC,cAAgB8pD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQQ,UAAUC,cACjLj/C,KAAK0hD,UAAUlD,QAAQI,gBAAkB5+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUJ,gBAAwBmqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQI,gBACzK5+C,KAAK0hD,UAAUlD,QAAQK,cAAgB7+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUH,cAA0BkqD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQK,cACvK7+C,KAAK0hD,UAAUlD,QAAQM,gBAAkB9+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUF,gBAAwBiqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQM,gBACzK9+C,KAAK0hD,UAAUlD,QAAQO,SAAW/+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUD,SAA+BgqD,EAAgB/gG,KAAK,YAAchI,KAAK0hD,UAAUlD,QAAQO,SACxI,GAA1BgqD,EAAgBpjG,OAAa,CAC/B8I,GAAW,gBACX,KAAK,GAAIjJ,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,KAEiB,GAA1Bs6F,EAAgBpjG,SAAc8I,GAAW,KACzCzO,KAAK0hD,UAAUZ,cAAgB9gD,KAAKmpG,gBAAgBroD,eACtDryC,GAAW,mBAAqBzO,KAAK0hD,UAAUZ,cAEjDryC,GAAW,SAER,CAOH,GANAA,EAAU,kBACNzO,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,cAAgBj/C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBD,cAAgB8pD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,cACrNj/C,KAAK0hD,UAAUlD,QAAQI,gBAAkB5+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBN,gBAAwBmqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQI,gBACrL5+C,KAAK0hD,UAAUlD,QAAQK,cAAgB7+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBL,cAA0BkqD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQK,cACnL7+C,KAAK0hD,UAAUlD,QAAQM,gBAAkB9+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBJ,gBAAwBiqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQM,gBACrL9+C,KAAK0hD,UAAUlD,QAAQO,SAAW/+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBH,SAA+BgqD,EAAgB/gG,KAAK,YAAchI,KAAK0hD,UAAUlD,QAAQO,SACpJ,GAA1BgqD,EAAgBpjG,OAAa,CAC/B8I,GAAW,oCACX,KAAK,GAAIjJ,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,MAOb,GALAA,GAAW,wBACXs6F,KACI/oG,KAAK0hD,UAAUjB,mBAAmBvlB,WAAal7B,KAAKmpG,gBAAgB1oD,mBAAmBvlB,WAAkC6tE,EAAgB/gG,KAAK,cAAgBhI,KAAK0hD,UAAUjB,mBAAmBvlB,WAChMh2B,KAAK2lB,IAAI7qB,KAAK0hD,UAAUjB,mBAAmBC,kBAAoB1gD,KAAKmpG,gBAAgB1oD,mBAAmBC,iBAAkBqoD,EAAgB/gG,KAAK,oBAAsBhI,KAAK0hD,UAAUjB,mBAAmBC,iBACtM1gD,KAAK0hD,UAAUjB,mBAAmBE,aAAe3gD,KAAKmpG,gBAAgB1oD,mBAAmBE,aAAgCooD,EAAgB/gG,KAAK,gBAAkBhI,KAAK0hD,UAAUjB,mBAAmBE,aACxK,GAA1BooD,EAAgBpjG,OAAa,CAC/B,IAAK,GAAIH,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,QAGXA,IAAW,eAEbA,IAAW,KAIbzO,KAAKopG,WAAWnlF,UAAYxV,EAO9B,QAAS46F,KACP,GAAIl0F,IAAO,iBAAkB,gBAAiB,iBAC1Cm0F,EAAc/3F,SAASg4F,cAAc,6CAA6CliG,MAClFmiG,EAAU,SAAWF,EAAc,SACnCG,EAAQl4F,SAASo3F,eAAea,EACpCC,GAAMx8F,MAAM26B,QAAU,OACtB,KAAK,GAAIpiC,GAAI,EAAGA,EAAI2P,EAAIxP,OAAQH,IAC1B2P,EAAI3P,IAAMgkG,IACZC,EAAQl4F,SAASo3F,eAAexzF,EAAI3P,IACpCikG,EAAMx8F,MAAM26B,QAAU,OAG1B5nC,MAAKwoG,gBACc,KAAfc,GACFtpG,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,GAErB,KAAf46F,EAC0C,GAA7CtpG,KAAK0hD,UAAUjB,mBAAmB/xC,UACpC1O,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,EAC3C1O,KAAK0hD,UAAUZ,aAAapyC,SAAU,EACtC1O,KAAKglD,6BAIPhlD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,GAE7C1O,KAAKmtE,0BACL,IAAIu7B,GAAqBn3F,SAASo3F,eAAe,qBACCD,GAAmBz7F,MAAMd,WAAhC,GAAvCnM,KAAK0hD,UAAUZ,aAAapyC,QAAwD,UACR,UAChF1O,KAAK8kD,QAAS,EACd9kD,KAAK4P,QAWP,QAASi5F,GAAkBxoG,EAAGgN,EAAIq8F,GAChC,GAAIC,GAAUtpG,EAAK,SACfupG,EAAar4F,SAASo3F,eAAetoG,GAAIgH,KAEzCpB,OAAMC,QAAQmH,IAChBkE,SAASo3F,eAAegB,GAAStiG,MAAQgG,EAAIzC,SAASg/F,IACtD5pG,KAAK6pG,yBAAyBH,EAAsBr8F,EAAIzC,SAASg/F,OAGjEr4F,SAASo3F,eAAegB,GAAStiG,MAAQuD,SAASyC,GAAOgY,WAAWukF,GACpE5pG,KAAK6pG,yBAAyBH,EAAuB9+F,SAASyC,GAAOgY,WAAWukF,MAGrD,gCAAzBF,GACuB,sCAAzBA,GACyB,kCAAzBA,IACA1pG,KAAKglD,2BAEPhlD,KAAK8kD,QAAS,EACd9kD,KAAK4P,QA7sBP,GAAIjP,GAAOT,EAAoB,GAC3B4pG,EAAiB5pG,EAAoB,IACrC6pG,EAA4B7pG,EAAoB,IAChD8pG,EAAiB9pG,EAAoB,GAOzCN,GAAQqqG,iBAAmB,WACzBjqG,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAW1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,QAC7E1O,KAAKmtE,2BACLntE,KAAK8kD,QAAS,EACd9kD,KAAK4P,SASPhQ,EAAQutE,yBAA2B,WAEe,GAA5CntE,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SACnC1O,KAAKktE,YAAY48B,GACjB9pG,KAAKktE,YAAY68B,GAEjB/pG,KAAK0hD,UAAUlD,QAAQI,eAAiB5+C,KAAK0hD,UAAUlD,QAAQC,UAAUG,eACzE5+C,KAAK0hD,UAAUlD,QAAQK,aAAe7+C,KAAK0hD,UAAUlD,QAAQC,UAAUI,aACvE7+C,KAAK0hD,UAAUlD,QAAQM,eAAiB9+C,KAAK0hD,UAAUlD,QAAQC,UAAUK,eACzE9+C,KAAK0hD,UAAUlD,QAAQO,QAAU/+C,KAAK0hD,UAAUlD,QAAQC,UAAUM,QAElE/+C,KAAK+sE,WAAWi9B,IAE+C,GAAxDhqG,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SACpD1O,KAAKktE,YAAY88B,GACjBhqG,KAAKktE,YAAY48B,GAEjB9pG,KAAK0hD,UAAUlD,QAAQI,eAAiB5+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBN,eACrF5+C,KAAK0hD,UAAUlD,QAAQK,aAAe7+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBL,aACnF7+C,KAAK0hD,UAAUlD,QAAQM,eAAiB9+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBJ,eACrF9+C,KAAK0hD,UAAUlD,QAAQO,QAAU/+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBH,QAE9E/+C,KAAK+sE,WAAWg9B,KAGhB/pG,KAAKktE,YAAY88B,GACjBhqG,KAAKktE,YAAY68B,GACjB/pG,KAAKkqG,cAAgB1jG,OAErBxG,KAAK0hD,UAAUlD,QAAQI,eAAiB5+C,KAAK0hD,UAAUlD,QAAQQ,UAAUJ,eACzE5+C,KAAK0hD,UAAUlD,QAAQK,aAAe7+C,KAAK0hD,UAAUlD,QAAQQ,UAAUH,aACvE7+C,KAAK0hD,UAAUlD,QAAQM,eAAiB9+C,KAAK0hD,UAAUlD,QAAQQ,UAAUF,eACzE9+C,KAAK0hD,UAAUlD,QAAQO,QAAU/+C,KAAK0hD,UAAUlD,QAAQQ,UAAUD,QAElE/+C,KAAK+sE,WAAW+8B,KAUpBlqG,EAAQuqG,4BAA8B,WAEL,GAA3BnqG,KAAK8jD,YAAYn+C,OACnB3F,KAAKg9C,MAAMh9C,KAAK8jD,YAAY,IAAIsa,UAAU,EAAG,IAIzCp+D,KAAK8jD,YAAYn+C,OAAS3F,KAAK0hD,UAAUvC,WAAWE,kBAAyD,GAArCr/C,KAAK0hD,UAAUvC,WAAWzwC,SACpG1O,KAAKm8F,aAAan8F,KAAK0hD,UAAUvC,WAAWG,eAAe,GAI7Dt/C,KAAKoqG,qBAUTxqG,EAAQwqG,iBAAmB,WAKzBpqG,KAAKqqG,gCACLrqG,KAAKsqG,uBAEDtqG,KAAK0hD,UAAUlD,QAAQM,eAAiB,IACC,GAAvC9+C,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAC7E/gD,KAAKuqG,oCAGuD,GAAxDvqG,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,QAC/C1O,KAAKwqG,qCAGLxqG,KAAKyqG,2BAeb7qG,EAAQyuD,wBAA0B,WAChC,GAA2C,GAAvCruD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAAiB,CAC9F/gD,KAAK4jD,oBACL5jD,KAAK6jD,yBAEL,KAAK,GAAIsC,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BnmD,KAAK4jD,iBAAiBuC,GAAUnmD,KAAKg9C,MAAMmJ,GAG/C,IAAIogD,GAAevmG,KAAKkvD,QAAiB,QAAS,KAClD,KAAK,GAAIw7C,KAAiBnE,GACpBA,EAAazgG,eAAe4kG,KAC1B1qG,KAAK89C,MAAMh4C,eAAeygG,EAAamE,GAAez4C,cACxDjyD,KAAK4jD,iBAAiB8mD,GAAiBnE,EAAamE,GAGpDnE,EAAamE,GAAetsC,UAAU,EAAG,GAK/C,KAAK,GAAIlX,KAAOlnD,MAAK4jD,iBACf5jD,KAAK4jD,iBAAiB99C,eAAeohD,IACvClnD,KAAK6jD,uBAAuB77C,KAAKk/C,OAKrClnD,MAAK4jD,iBAAmB5jD,KAAKg9C,MAC7Bh9C,KAAK6jD,uBAAyB7jD,KAAK8jD,aAUvClkD,EAAQyqG,8BAAgC,WACtC,GAAIzrF,GAAIC,EAAI8G,EAAUmgC,EAAMtgD,EACxBw3C,EAAQh9C,KAAK4jD,iBACb+mD,EAAU3qG,KAAK0hD,UAAUlD,QAAQI,eACjCgsD,EAAe,CAEnB,KAAKplG,EAAI,EAAGA,EAAIxF,KAAK6jD,uBAAuBl+C,OAAQH,IAClDsgD,EAAO9I,EAAMh9C,KAAK6jD,uBAAuBr+C,IACzCsgD,EAAK/G,QAAU/+C,KAAK0hD,UAAUlD,QAAQO,QAEhB,WAAlB/+C,KAAK88F,WAAqC,GAAX6N,GACjC/rF,GAAMknC,EAAK/zC,EACX8M,GAAMinC,EAAK9zC,EACX2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpC+rF,EAA4B,GAAZjlF,EAAiB,EAAKglF,EAAUhlF,EAChDmgC,EAAKyV,GAAK38C,EAAKgsF,EACf9kD,EAAK0V,GAAK38C,EAAK+rF,IAGf9kD,EAAKyV,GAAK,EACVzV,EAAK0V,GAAK,IAahB57D,EAAQ6qG,uBAAyB,WAC/B,GAAII,GAAYn9C,EAAMV,EAClBpuC,EAAIC,EAAI08C,EAAIC,EAAIsvC,EAAanlF,EAC7Bm4B,EAAQ99C,KAAK89C,KAGjB,KAAKkP,IAAUlP,GACTA,EAAMh4C,eAAeknD,KACvBU,EAAO5P,EAAMkP,GACTU,EAAKC,WAEH3tD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKkG,OAAS5zD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKiG,UACzEk3C,EAAan9C,EAAKlP,QAAQK,aAE1BgsD,IAAen9C,EAAKrkC,GAAGgzC,YAAc3O,EAAKtkC,KAAKizC,YAAc,GAAKr8D,KAAK0hD,UAAUvC,WAAWY,WAE5FnhC,EAAM8uC,EAAKtkC,KAAKrX,EAAI27C,EAAKrkC,GAAGtX,EAC5B8M,EAAM6uC,EAAKtkC,KAAKpX,EAAI07C,EAAKrkC,GAAGrX,EAC5B2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpB,GAAZ8G,IACFA,EAAW,KAIbmlF,EAAc9qG,KAAK0hD,UAAUlD,QAAQM,gBAAkB+rD,EAAallF,GAAYA,EAEhF41C,EAAK38C,EAAKksF,EACVtvC,EAAK38C,EAAKisF,EAEVp9C,EAAKtkC,KAAKmyC,IAAMA,EAChB7N,EAAKtkC,KAAKoyC,IAAMA,EAChB9N,EAAKrkC,GAAGkyC,IAAMA,EACd7N,EAAKrkC,GAAGmyC,IAAMA,KAexB57D,EAAQ2qG,kCAAoC,WAC1C,GAAIM,GAAYn9C,EAAMV,EAAQ+9C,EAC1BjtD,EAAQ99C,KAAK89C,KAGjB,KAAKkP,IAAUlP,GACb,GAAIA,EAAMh4C,eAAeknD,KACvBU,EAAO5P,EAAMkP,GACTU,EAAKC,WAEH3tD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKkG,OAAS5zD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKiG,SACzD,MAAZjG,EAAKuB,KAAa,CACpB,GAAI+7C,GAAQt9C,EAAKrkC,GACb4hF,EAAQv9C,EAAKuB,IACbi8C,EAAQx9C,EAAKtkC,IAEjByhF,GAAan9C,EAAKlP,QAAQK,aAE1BksD,EAAsBC,EAAM3uC,YAAc6uC,EAAM7uC,YAAc,EAG9DwuC,GAAcE,EAAsB/qG,KAAK0hD,UAAUvC,WAAWY,WAC9D//C,KAAKmrG,sBAAsBH,EAAOC,EAAO,GAAMJ,GAC/C7qG,KAAKmrG,sBAAsBF,EAAOC,EAAO,GAAML,KAiB3DjrG,EAAQurG,sBAAwB,SAAUH,EAAOC,EAAOJ,GACtD,GAAIjsF,GAAIC,EAAI08C,EAAIC,EAAIsvC,EAAanlF,CAEjC/G,GAAMosF,EAAMj5F,EAAIk5F,EAAMl5F,EACtB8M,EAAMmsF,EAAMh5F,EAAIi5F,EAAMj5F,EACtB2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpB,GAAZ8G,IACFA,EAAW,KAIbmlF,EAAc9qG,KAAK0hD,UAAUlD,QAAQM,gBAAkB+rD,EAAallF,GAAYA,EAEhF41C,EAAK38C,EAAKksF,EACVtvC,EAAK38C,EAAKisF,EAEVE,EAAMzvC,IAAMA,EACZyvC,EAAMxvC,IAAMA,EACZyvC,EAAM1vC,IAAMA,EACZ0vC,EAAMzvC,IAAMA,GAId57D,EAAQwqD,6BAA+B,WACrC,GAAkC5jD,SAA9BxG,KAAKorG,qBAAoC,CAC3C,KAAOprG,KAAKorG,qBAAqB1nF,iBAC/B1jB,KAAKorG,qBAAqBj6F,YAAYnR,KAAKorG,qBAAqBznF,WAGlE3jB,MAAKorG,qBAAqBvhG,WAAWsH,YAAYnR,KAAKorG,sBACtDprG,KAAKorG,qBAAuB5kG,SAQhC5G,EAAQwtE,0BAA4B,WAClC,GAAkC5mE,SAA9BxG,KAAKorG,qBAAoC,CAC3CprG,KAAKmpG,mBACLxoG,EAAK8F,WAAWzG,KAAKmpG,gBAAgBnpG,KAAK0hD,UAE1C,IAAI2pD,IAAgC,KAAM,KAAM,KAAM,KACtDrrG,MAAKorG,qBAAuB75F,SAASM,cAAc,OACnD7R,KAAKorG,qBAAqBvjG,UAAY,uBACtC7H,KAAKorG,qBAAqBnnF,UAAY,onBAW2E,GAAKjkB,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAyB,wGAA2G,GAAK3+C,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAyB,4JAGpP3+C,KAAK0hD,UAAUlD,QAAQC,UAAUG,eAAiB,wFAA0F5+C,KAAK0hD,UAAUlD,QAAQC,UAAUG,eAAiB,2JAG/L5+C,KAAK0hD,UAAUlD,QAAQC,UAAUI,aAAe,sFAAwF7+C,KAAK0hD,UAAUlD,QAAQC,UAAUI,aAAe,6JAGtL7+C,KAAK0hD,UAAUlD,QAAQC,UAAUK,eAAiB,0FAA4F9+C,KAAK0hD,UAAUlD,QAAQC,UAAUK,eAAiB,sJAGvM9+C,KAAK0hD,UAAUlD,QAAQC,UAAUM,QAAU,4FAA8F/+C,KAAK0hD,UAAUlD,QAAQC,UAAUM,QAAU,sPAM/K/+C,KAAK0hD,UAAUlD,QAAQQ,UAAUC,aAAe,kGAAoGj/C,KAAK0hD,UAAUlD,QAAQQ,UAAUC,aAAe,2JAGnMj/C,KAAK0hD,UAAUlD,QAAQQ,UAAUJ,eAAiB,uFAAyF5+C,KAAK0hD,UAAUlD,QAAQQ,UAAUJ,eAAiB,0JAG9L5+C,KAAK0hD,UAAUlD,QAAQQ,UAAUH,aAAe,qFAAuF7+C,KAAK0hD,UAAUlD,QAAQQ,UAAUH,aAAe,4JAGrL7+C,KAAK0hD,UAAUlD,QAAQQ,UAAUF,eAAiB,yFAA2F9+C,KAAK0hD,UAAUlD,QAAQQ,UAAUF,eAAiB,qJAGtM9+C,KAAK0hD,UAAUlD,QAAQQ,UAAUD,QAAU,2FAA6F/+C,KAAK0hD,UAAUlD,QAAQQ,UAAUD,QAAU,oQAM9K/+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,aAAe,kGAAoGj/C,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,aAAe,2JAG3Nj/C,KAAK0hD,UAAUlD,QAAQU,sBAAsBN,eAAiB,uFAAyF5+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBN,eAAiB,0JAGtN5+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBL,aAAe,qFAAuF7+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBL,aAAe,4JAG7M7+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBJ,eAAiB,yFAA2F9+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBJ,eAAiB,qJAG9N9+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBH,QAAU,2FAA6F/+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBH,QAAU,uJAG3MssD,EAA6B1kG,QAAQ3G,KAAK0hD,UAAUjB,mBAAmBvlB,WAAa,0FAA4Fl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UAAY,oKAGtNl7B,KAAK0hD,UAAUjB,mBAAmBC,gBAAkB,yFAA2F1gD,KAAK0hD,UAAUjB,mBAAmBC,gBAAkB,6JAGvM1gD,KAAK0hD,UAAUjB,mBAAmBE,YAAc,wFAA0F3gD,KAAK0hD,UAAUjB,mBAAmBE,YAAc,odAU9R3gD,KAAKyZ,iBAAiB6xF,cAAc15F,aAAa5R,KAAKorG,qBAAsBprG,KAAKyZ,kBACjFzZ,KAAKopG,WAAa73F,SAASM,cAAc,OACzC7R,KAAKopG,WAAWn8F,MAAMswC,SAAW,OACjCv9C,KAAKopG,WAAWn8F,MAAMszD,WAAa,UACnCvgE,KAAKyZ,iBAAiB6xF,cAAc15F,aAAa5R,KAAKopG,WAAYppG,KAAKyZ,iBAEvE;GAAI8xF,EACJA,GAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,GAAI,2CACvEurG,EAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,EAAG,0BACtEurG,EAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,EAAG,0BACtEurG,EAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,EAAG,wBACtEurG,EAAeh6F,SAASo3F,eAAe,iBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,gBAAiB,EAAG,mBAExEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,kCACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,wBACrEurG,EAAeh6F,SAASo3F,eAAe,gBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,eAAgB,EAAG,mBAEvEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,8CACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,wBACrEurG,EAAeh6F,SAASo3F,eAAe,gBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,eAAgB,EAAG,mBACvEurG,EAAeh6F,SAASo3F,eAAe,qBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,oBAAqBqrG,EAA8B,gCACvGE,EAAeh6F,SAASo3F,eAAe,kBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,iBAAkB,EAAG,sCACzEurG,EAAeh6F,SAASo3F,eAAe,iBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,gBAAiB,EAAG,iCAExE,IAAIgpG,GAAez3F,SAASo3F,eAAe,wBACvCM,EAAe13F,SAASo3F,eAAe,wBACvC6C,EAAej6F,SAASo3F,eAAe,uBAC3CM,GAAaC,SAAU,EACnBlpG,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,UACnCs6F,EAAaE,SAAU,GAErBlpG,KAAK0hD,UAAUjB,mBAAmB/xC,UACpC88F,EAAatC,SAAU,EAGzB,IAAIR,GAAqBn3F,SAASo3F,eAAe,sBAC7C8C,EAAwBl6F,SAASo3F,eAAe,yBAChD+C,EAAwBn6F,SAASo3F,eAAe,wBAEpDD,GAAmB12E,QAAUy2E,EAAwB3zE,KAAK90B,MAC1DyrG,EAAsBz5E,QAAU42E,EAAqB9zE,KAAK90B,MAC1D0rG,EAAsB15E,QAAU82E,EAAqBh0E,KAAK90B,MAExD0oG,EAAmBz7F,MAAMd,WADQ,GAA/BnM,KAAK0hD,UAAUZ,cAA8D,GAAtC9gD,KAAK0hD,UAAUiqD,oBAClB,UAGA,UAIxCtC,EAAqBtxF,MAAM/X,MAE3BgpG,EAAangF,SAAWwgF,EAAqBv0E,KAAK90B,MAClDipG,EAAapgF,SAAWwgF,EAAqBv0E,KAAK90B,MAClDwrG,EAAa3iF,SAAWwgF,EAAqBv0E,KAAK90B,QAWtDJ,EAAQiqG,yBAA2B,SAAUH,EAAuBriG,GAClE,GAAIukG,GAAYlC,EAAsB3hG,MAAM,IACpB,IAApB6jG,EAAUjmG,OACZ3F,KAAK0hD,UAAUkqD,EAAU,IAAMvkG,EAEJ,GAApBukG,EAAUjmG,OACjB3F,KAAK0hD,UAAUkqD,EAAU,IAAIA,EAAU,IAAMvkG,EAElB,GAApBukG,EAAUjmG,SACjB3F,KAAK0hD,UAAUkqD,EAAU,IAAIA,EAAU,IAAIA,EAAU,IAAMvkG,KA6N3D,SAASxH,GAEb,QAASgsG,GAAeC,GACvB,KAAM,IAAIloG,OAAM,uBAAyBkoG,EAAM,MAEhDD,EAAez+F,KAAO,WAAa,UACnCy+F,EAAeE,QAAUF,EACzBhsG,EAAOD,QAAUisG,EACjBA,EAAexrG,GAAK,IAKhB,SAASR,EAAQD,GAQrBA,EAAQ0qG,qBAAuB,WAC7B,GAAI1rF,GAAIC,EAAW8G,EAAU41C,EAAIC,EAAIuvC,EACnCiB,EAAgBhB,EAAOC,EAAOzlG,EAAGomB,EAE/BoxB,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBAGnBooD,EAAS,GAAK,EACd7lG,EAAI,EAAI,EAGR64C,EAAej/C,KAAK0hD,UAAUlD,QAAQQ,UAAUC,aAChDitD,EAAkBjtD,CAItB,KAAKz5C,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAS,EAAGH,IAEtC,IADAwlG,EAAQhuD,EAAM8G,EAAYt+C,IACrBomB,EAAIpmB,EAAI,EAAGomB,EAAIk4B,EAAYn+C,OAAQimB,IAAK,CAC3Cq/E,EAAQjuD,EAAM8G,EAAYl4B,IAC1Bm/E,EAAsBC,EAAM3uC,YAAc4uC,EAAM5uC,YAAc,EAE9Dz9C,EAAKqsF,EAAMl5F,EAAIi5F,EAAMj5F,EACrB8M,EAAKosF,EAAMj5F,EAAIg5F,EAAMh5F,EACrB2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAGpB,GAAZ8G,IACFA,EAAW,GAAIzgB,KAAKE,SACpBwZ,EAAK+G,GAGPumF,EAA0C,GAAvBnB,EAA4B9rD,EAAgBA,GAAgB,EAAI8rD,EAAsB/qG,KAAK0hD,UAAUvC,WAAWW,sBACnI,IAAIv6C,GAAI0mG,EAASC,CACF,GAAIA,EAAfvmF,IAEAqmF,EADa,GAAME,EAAjBvmF,EACe,EAGApgB,EAAIogB,EAAWvf,EAIlC4lG,GAA0C,GAAvBjB,EAA4B,EAAI,EAAIA,EAAsB/qG,KAAK0hD,UAAUvC,WAAWU,mBACvGmsD,GAAkC9mG,KAAKwH,IAAIiZ,EAAS,IAAKumF,GAEzD3wC,EAAK38C,EAAKotF,EACVxwC,EAAK38C,EAAKmtF,EACVhB,EAAMzvC,IAAMA,EACZyvC,EAAMxvC,IAAMA,EACZyvC,EAAM1vC,IAAMA,EACZ0vC,EAAMzvC,IAAMA,MAUhB,SAAS37D,EAAQD,GAQrBA,EAAQ0qG,qBAAuB,WAC7B,GAAI1rF,GAAIC,EAAI8G,EAAU41C,EAAIC,EACxBwwC,EAAgBhB,EAAOC,EAAOzlG,EAAGomB,EAE/BoxB,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBAGnB5E,EAAej/C,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,YAIhE,KAAKz5C,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAS,EAAGH,IAEtC,IADAwlG,EAAQhuD,EAAM8G,EAAYt+C,IACrBomB,EAAIpmB,EAAI,EAAGomB,EAAIk4B,EAAYn+C,OAAQimB,IAItC,GAHAq/E,EAAQjuD,EAAM8G,EAAYl4B,IAGtBo/E,EAAMptD,OAASqtD,EAAMrtD,MAAO,CAE9Bh/B,EAAKqsF,EAAMl5F,EAAIi5F,EAAMj5F,EACrB8M,EAAKosF,EAAMj5F,EAAIg5F,EAAMh5F,EACrB2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,EAGpC,IAAIstF,GAAY,GAEdH,GADa/sD,EAAXt5B,GACgBzgB,KAAK4uB,IAAIq4E,EAAUxmF,EAAS,GAAKzgB,KAAK4uB,IAAIq4E,EAAUltD,EAAa,GAGlE,EAGD,GAAZt5B,EACFA,EAAW,IAGXqmF,GAAkCrmF,EAEpC41C,EAAK38C,EAAKotF,EACVxwC,EAAK38C,EAAKmtF,EAEVhB,EAAMzvC,IAAMA,EACZyvC,EAAMxvC,IAAMA,EACZyvC,EAAM1vC,IAAMA,EACZ0vC,EAAMzvC,IAAMA,IAYtB57D,EAAQ4qG,mCAAqC,WAS3C,IAAK,GARDK,GAAYn9C,EAAMV,EAClBpuC,EAAIC,EAAI08C,EAAIC,EAAIsvC,EAAanlF,EAC7Bm4B,EAAQ99C,KAAK89C,MAEbd,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBAGdr+C,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CAC3C,GAAIwlG,GAAQhuD,EAAM8G,EAAYt+C,GAC9BwlG,GAAMoB,SAAW,EACjBpB,EAAMqB,SAAW,EAKnB,IAAKr/C,IAAUlP,GACb,GAAIA,EAAMh4C,eAAeknD,KACvBU,EAAO5P,EAAMkP,GACTU,EAAKC,WAEH3tD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKkG,OAAS5zD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKiG,SAqBzE,GApBAk3C,EAAan9C,EAAKlP,QAAQK,aAE1BgsD,IAAen9C,EAAKrkC,GAAGgzC,YAAc3O,EAAKtkC,KAAKizC,YAAc,GAAKr8D,KAAK0hD,UAAUvC,WAAWY,WAE5FnhC,EAAM8uC,EAAKtkC,KAAKrX,EAAI27C,EAAKrkC,GAAGtX,EAC5B8M,EAAM6uC,EAAKtkC,KAAKpX,EAAI07C,EAAKrkC,GAAGrX,EAC5B2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpB,GAAZ8G,IACFA,EAAW,KAIbmlF,EAAc9qG,KAAK0hD,UAAUlD,QAAQM,gBAAkB+rD,EAAallF,GAAYA,EAEhF41C,EAAK38C,EAAKksF,EACVtvC,EAAK38C,EAAKisF,EAINp9C,EAAKrkC,GAAGu0B,OAAS8P,EAAKtkC,KAAKw0B,MAC7B8P,EAAKrkC,GAAG+iF,UAAY7wC,EACpB7N,EAAKrkC,GAAGgjF,UAAY7wC,EACpB9N,EAAKtkC,KAAKgjF,UAAY7wC,EACtB7N,EAAKtkC,KAAKijF,UAAY7wC,MAEnB,CACH,GAAI7U,GAAS,EACb+G,GAAKrkC,GAAGkyC,IAAM5U,EAAO4U,EACrB7N,EAAKrkC,GAAGmyC,IAAM7U,EAAO6U,EACrB9N,EAAKtkC,KAAKmyC,IAAM5U,EAAO4U,EACvB7N,EAAKtkC,KAAKoyC,IAAM7U,EAAO6U,EAQjC,GACI4wC,GAAUC,EADVvB,EAAc,CAElB,KAAKtlG,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CACvC,GAAIsgD,GAAO9I,EAAM8G,EAAYt+C,GAC7B4mG,GAAWlnG,KAAK4G,IAAIg/F,EAAY5lG,KAAKwH,KAAKo+F,EAAYhlD,EAAKsmD,WAC3DC,EAAWnnG,KAAK4G,IAAIg/F,EAAY5lG,KAAKwH,KAAKo+F,EAAYhlD,EAAKumD,WAE3DvmD,EAAKyV,IAAM6wC,EACXtmD,EAAK0V,IAAM6wC,EAIb,GAAIC,GAAU,EACVC,EAAU,CACd,KAAK/mG,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CACvC,GAAIsgD,GAAO9I,EAAM8G,EAAYt+C,GAC7B8mG,IAAWxmD,EAAKyV,GAChBgxC,GAAWzmD,EAAK0V,GAElB,GAAIgxC,GAAeF,EAAUxoD,EAAYn+C,OACrC8mG,EAAeF,EAAUzoD,EAAYn+C,MAEzC,KAAKH,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CACvC,GAAIsgD,GAAO9I,EAAM8G,EAAYt+C,GAC7BsgD,GAAKyV,IAAMixC,EACX1mD,EAAK0V,IAAMixC,KAOX,SAAS5sG,EAAQD,GAQrBA,EAAQ0qG,qBAAuB,WAC7B,GAA8D,GAA1DtqG,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAA4B,CAC/D,GAAImH,GACA9I,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBACnB6oD,EAAY5oD,EAAYn+C,MAE5B3F,MAAK2sG,mBAAmB3vD,EAAM8G,EAK9B,KAAK,GAHDomD,GAAgBlqG,KAAKkqG,cAGhB1kG,EAAI,EAAOknG,EAAJlnG,EAAeA,IAC7BsgD,EAAO9I,EAAM8G,EAAYt+C,IACrBsgD,EAAKr3C,QAAQwuC,KAAO,IAEtBj9C,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASC,GAAGhnD,GAC1D9lD,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASE,GAAGjnD,GAC1D9lD,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASG,GAAGlnD,GAC1D9lD,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASI,GAAGnnD,MAelElmD,EAAQgtG,sBAAwB,SAASM,EAAapnD,GAEpD,GAAIonD,EAAaC,cAAgB,EAAG,CAClC,GAAIvuF,GAAGC,EAAG8G,CAUV,IAPA/G,EAAKsuF,EAAaE,aAAar7F,EAAI+zC,EAAK/zC,EACxC8M,EAAKquF,EAAaE,aAAap7F,EAAI8zC,EAAK9zC,EACxC2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAKhC8G,EAAWunF,EAAaG,SAAWrtG,KAAK0hD,UAAUlD,QAAQC,UAAUC,cAAe,CAErE,GAAZ/4B,IACFA,EAAW,GAAIzgB,KAAKE,SACpBwZ,EAAK+G,EAEP,IAAIilF,GAAe5qG,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAwBuuD,EAAajwD,KAAO6I,EAAKr3C,QAAQwuC,MAAQt3B,EAAWA,EAAWA,GACvI41C,EAAK38C,EAAKgsF,EACVpvC,EAAK38C,EAAK+rF,CACd9kD,GAAKyV,IAAMA,EACXzV,EAAK0V,IAAMA,MAIX,IAAkC,GAA9B0xC,EAAaC,cACfntG,KAAK4sG,sBAAsBM,EAAaL,SAASC,GAAGhnD,GACpD9lD,KAAK4sG,sBAAsBM,EAAaL,SAASE,GAAGjnD,GACpD9lD,KAAK4sG,sBAAsBM,EAAaL,SAASG,GAAGlnD,GACpD9lD,KAAK4sG,sBAAsBM,EAAaL,SAASI,GAAGnnD,OAGpD,IAAIonD,EAAaL,SAASn6F,KAAKrS,IAAMylD,EAAKzlD,GAAI,CAE5B,GAAZslB,IACFA,EAAW,GAAIzgB,KAAKE,SACpBwZ,EAAK+G,EAEP,IAAIilF,GAAe5qG,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAwBuuD,EAAajwD,KAAO6I,EAAKr3C,QAAQwuC,MAAQt3B,EAAWA,EAAWA,GACvI41C,EAAK38C,EAAKgsF,EACVpvC,EAAK38C,EAAK+rF,CACd9kD,GAAKyV,IAAMA,EACXzV,EAAK0V,IAAMA,KAcrB57D,EAAQ+sG,mBAAqB,SAAS3vD,EAAM8G,GAU1C,IAAK,GATDgC,GACA4mD,EAAY5oD,EAAYn+C,OAExBsgD,EAAO/hD,OAAOopG,UAChBvnD,EAAO7hD,OAAOopG,UACdpnD,GAAOhiD,OAAOopG,UACdtnD,GAAO9hD,OAAOopG,UAGP9nG,EAAI,EAAOknG,EAAJlnG,EAAeA,IAAK,CAClC,GAAIuM,GAAIirC,EAAM8G,EAAYt+C,IAAIuM,EAC1BC,EAAIgrC,EAAM8G,EAAYt+C,IAAIwM,CAC1BgrC,GAAM8G,EAAYt+C,IAAIiJ,QAAQwuC,KAAO,IAC/BgJ,EAAJl0C,IAAYk0C,EAAOl0C,GACnBA,EAAIm0C,IAAQA,EAAOn0C,GACfg0C,EAAJ/zC,IAAY+zC,EAAO/zC,GACnBA,EAAIg0C,IAAQA,EAAOh0C,IAI3B,GAAIu7F,GAAWroG,KAAK2lB,IAAIq7B,EAAOD,GAAQ/gD,KAAK2lB,IAAIm7B,EAAOD,EACnDwnD,GAAW,GAAIxnD,GAAQ,GAAMwnD,EAAUvnD,GAAQ,GAAMunD,IACtCtnD,GAAQ,GAAMsnD,EAAUrnD,GAAQ,GAAMqnD,EAGzD,IAAIC,GAAkB,KAClBC,EAAWvoG,KAAKwH,IAAI8gG,EAAgBtoG,KAAK2lB,IAAIq7B,EAAOD,IACpDynD,EAAe,GAAMD,EACrBxuC,EAAU,IAAOhZ,EAAOC,GAAOgZ,EAAU,IAAOnZ,EAAOC,GAGvDkkD,GACFxqG,MACE0tG,cAAer7F,EAAE,EAAGC,EAAE,GACtBirC,KAAK,EACLxnB,OACEwwB,KAAMgZ,EAAQyuC,EAAaxnD,KAAK+Y,EAAQyuC,EACxC3nD,KAAMmZ,EAAQwuC,EAAa1nD,KAAKkZ,EAAQwuC,GAE1Cr7F,KAAMo7F,EACNJ,SAAU,EAAII,EACdZ,UAAYn6F,KAAK,MACjBgpC,SAAU,EACVkC,MAAO,EACPuvD,cAAe,GAMnB,KAHAntG,KAAK2tG,aAAazD,EAAcxqG,MAG3B8F,EAAI,EAAOknG,EAAJlnG,EAAeA,IACzBsgD,EAAO9I,EAAM8G,EAAYt+C,IACrBsgD,EAAKr3C,QAAQwuC,KAAO,GACtBj9C,KAAK4tG,aAAa1D,EAAcxqG,KAAKomD,EAKzC9lD,MAAKkqG,cAAgBA,GAWvBtqG,EAAQiuG,kBAAoB,SAASX,EAAcpnD,GACjD,GAAIgoD,GAAYZ,EAAajwD,KAAO6I,EAAKr3C,QAAQwuC,KAC7C8wD,EAAe,EAAED,CAErBZ,GAAaE,aAAar7F,EAAIm7F,EAAaE,aAAar7F,EAAIm7F,EAAajwD,KAAO6I,EAAK/zC,EAAI+zC,EAAKr3C,QAAQwuC,KACtGiwD,EAAaE,aAAar7F,GAAKg8F,EAE/Bb,EAAaE,aAAap7F,EAAIk7F,EAAaE,aAAap7F,EAAIk7F,EAAajwD,KAAO6I,EAAK9zC,EAAI8zC,EAAKr3C,QAAQwuC,KACtGiwD,EAAaE,aAAap7F,GAAK+7F,EAE/Bb,EAAajwD,KAAO6wD,CACpB,IAAIE,GAAc9oG,KAAKwH,IAAIxH,KAAKwH,IAAIo5C,EAAKtzC,OAAOszC,EAAKr6B,QAAQq6B,EAAKvzC,MAClE26F,GAAaxxD,SAAYwxD,EAAaxxD,SAAWsyD,EAAeA,EAAcd,EAAaxxD,UAa7F97C,EAAQguG,aAAe,SAASV,EAAapnD,EAAKmoD,IAC1B,GAAlBA,GAA6CznG,SAAnBynG,IAE5BjuG,KAAK6tG,kBAAkBX,EAAapnD,GAGlConD,EAAaL,SAASC,GAAGr3E,MAAMywB,KAAOJ,EAAK/zC,EACzCm7F,EAAaL,SAASC,GAAGr3E,MAAMuwB,KAAOF,EAAK9zC,EAC7ChS,KAAKkuG,eAAehB,EAAapnD,EAAK,MAGtC9lD,KAAKkuG,eAAehB,EAAapnD,EAAK,MAIpConD,EAAaL,SAASC,GAAGr3E,MAAMuwB,KAAOF,EAAK9zC,EAC7ChS,KAAKkuG,eAAehB,EAAapnD,EAAK,MAGtC9lD,KAAKkuG,eAAehB,EAAapnD,EAAK,OAc5ClmD,EAAQsuG,eAAiB,SAAShB,EAAapnD,EAAKqoD,GAClD,OAAQjB,EAAaL,SAASsB,GAAQhB,eACpC,IAAK,GACHD,EAAaL,SAASsB,GAAQtB,SAASn6F,KAAOozC,EAC9ConD,EAAaL,SAASsB,GAAQhB,cAAgB,EAC9CntG,KAAK6tG,kBAAkBX,EAAaL,SAASsB,GAAQroD,EACrD,MACF,KAAK,GAGConD,EAAaL,SAASsB,GAAQtB,SAASn6F,KAAKX,GAAK+zC,EAAK/zC,GACtDm7F,EAAaL,SAASsB,GAAQtB,SAASn6F,KAAKV,GAAK8zC,EAAK9zC,GACxD8zC,EAAK/zC,GAAK7M,KAAKE,SACf0gD,EAAK9zC,GAAK9M,KAAKE,WAGfpF,KAAK2tG,aAAaT,EAAaL,SAASsB,IACxCnuG,KAAK4tG,aAAaV,EAAaL,SAASsB,GAAQroD,GAElD,MACF,KAAK,GACH9lD,KAAK4tG,aAAaV,EAAaL,SAASsB,GAAQroD,KAatDlmD,EAAQ+tG,aAAe,SAAST,GAE9B,GAAIkB,GAAgB,IACc,IAA9BlB,EAAaC,gBACfiB,EAAgBlB,EAAaL,SAASn6F,KACtCw6F,EAAajwD,KAAO,EAAGiwD,EAAaE,aAAar7F,EAAI,EAAGm7F,EAAaE,aAAap7F,EAAI,GAExFk7F,EAAaC,cAAgB,EAC7BD,EAAaL,SAASn6F,KAAO,KAC7B1S,KAAKquG,cAAcnB,EAAa,MAChCltG,KAAKquG,cAAcnB,EAAa,MAChCltG,KAAKquG,cAAcnB,EAAa,MAChCltG,KAAKquG,cAAcnB,EAAa,MAEX,MAAjBkB,GACFpuG,KAAK4tG,aAAaV,EAAakB,IAenCxuG,EAAQyuG,cAAgB,SAASnB,EAAciB,GAC7C,GAAIloD,GAAKC,EAAKH,EAAKC,EACfsoD,EAAY,GAAMpB,EAAa76F,IACnC,QAAQ87F,GACN,IAAK,KACHloD,EAAOinD,EAAaz3E,MAAMwwB,KAC1BC,EAAOgnD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCvoD,EAAOmnD,EAAaz3E,MAAMswB,KAC1BC,EAAOknD,EAAaz3E,MAAMswB,KAAOuoD,CACjC,MACF,KAAK,KACHroD,EAAOinD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCpoD,EAAOgnD,EAAaz3E,MAAMywB,KAC1BH,EAAOmnD,EAAaz3E,MAAMswB,KAC1BC,EAAOknD,EAAaz3E,MAAMswB,KAAOuoD,CACjC,MACF,KAAK,KACHroD,EAAOinD,EAAaz3E,MAAMwwB,KAC1BC,EAAOgnD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCvoD,EAAOmnD,EAAaz3E,MAAMswB,KAAOuoD,EACjCtoD,EAAOknD,EAAaz3E,MAAMuwB,IAC1B,MACF,KAAK,KACHC,EAAOinD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCpoD,EAAOgnD,EAAaz3E,MAAMywB,KAC1BH,EAAOmnD,EAAaz3E,MAAMswB,KAAOuoD,EACjCtoD,EAAOknD,EAAaz3E,MAAMuwB,KAK9BknD,EAAaL,SAASsB,IACpBf,cAAcr7F,EAAE,EAAEC,EAAE,GACpBirC,KAAK,EACLxnB,OAAOwwB,KAAKA,EAAKC,KAAKA,EAAKH,KAAKA,EAAKC,KAAKA,GAC1C3zC,KAAM,GAAM66F,EAAa76F,KACzBg7F,SAAU,EAAIH,EAAaG,SAC3BR,UAAWn6F,KAAK,MAChBgpC,SAAU,EACVkC,MAAOsvD,EAAatvD,MAAM,EAC1BuvD,cAAe,IAYnBvtG,EAAQ2uG,UAAY,SAASxnF,EAAI5b,GACJ3E,SAAvBxG,KAAKkqG,gBAEPnjF,EAAIO,UAAY,EAEhBtnB,KAAKwuG,YAAYxuG,KAAKkqG,cAAcxqG,KAAKqnB,EAAI5b,KAajDvL,EAAQ4uG,YAAc,SAASC,EAAO1nF,EAAI5b,GAC1B3E,SAAV2E,IACFA,EAAQ,WAGkB,GAAxBsjG,EAAOtB,gBACTntG,KAAKwuG,YAAYC,EAAO5B,SAASC,GAAG/lF,GACpC/mB,KAAKwuG,YAAYC,EAAO5B,SAASE,GAAGhmF,GACpC/mB,KAAKwuG,YAAYC,EAAO5B,SAASI,GAAGlmF,GACpC/mB,KAAKwuG,YAAYC,EAAO5B,SAASG,GAAGjmF,IAEtCA,EAAIY,YAAcxc,EAClB4b,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIe,OAAO2mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIlH,SAEJkH,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIe,OAAO2mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIlH,SAEJkH,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIe,OAAO2mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIlH,SAEJkH,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIe,OAAO2mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIlH,WAaF,SAAShgB,GAEbA,EAAOD,QAAU,SAASC,GAQzB,MAPIA,GAAO6uG,kBACV7uG,EAAOqwE,UAAY,aACnBrwE,EAAO8uG,SAEP9uG,EAAOgtG,YACPhtG,EAAO6uG,gBAAkB,GAEnB7uG,IAMJ,SAASA,EAAQD,IAEO,SAASgvG,GAA0B/uG,EAAOD,QAAUgvG,IAEnDruG,KAAKX"}
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/vis.min.js b/core/src/main/resources/org/apache/spark/ui/static/vis.min.js
index 4af2c818c27a6..2b3b1d60463f7 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/vis.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/vis.min.js
@@ -36,4 +36,3 @@ return!e&&this._longDateFormat[t.toUpperCase()]&&(e=this._longDateFormat[t.toUpp
 },e.time,this);else if(t.eventType&ke)return pi;return mi},reset:function(){clearTimeout(this._timer)},emit:function(t){this.state===pi&&(t&&t.eventType&ke?this.manager.emit(this.options.event+"up",t):(this._input.timeStamp=ve(),this.manager.emit(this.options.event,this._input)))}}),u(ne,ee,{defaults:{event:"rotate",threshold:0,pointers:2},getTouchAction:function(){return[ni]},attrTest:function(t){return this._super.attrTest.call(this,t)&&(Math.abs(t.rotation)>this.options.threshold||this.state&di)}}),u(re,ee,{defaults:{event:"swipe",threshold:10,velocity:.65,direction:Re|Fe,pointers:1},getTouchAction:function(){return ie.prototype.getTouchAction.call(this)},attrTest:function(t){var e,i=this.options.direction;return i&(Re|Fe)?e=t.velocity:i&Re?e=t.velocityX:i&Fe&&(e=t.velocityY),this._super.attrTest.call(this,t)&&i&t.direction&&t.distance>this.options.threshold&&ge(e)>this.options.velocity&&t.eventType&ke},emit:function(t){var e=J(t.direction);e&&this.manager.emit(this.options.event+e,t),this.manager.emit(this.options.event,t)}}),u(ae,K,{defaults:{event:"tap",pointers:1,taps:1,interval:300,time:250,threshold:2,posThreshold:10},getTouchAction:function(){return[oi]},process:function(t){var e=this.options,i=t.pointers.length===e.pointers,s=t.distance<e.threshold,o=t.deltaTime<e.time;if(this.reset(),t.eventType&Oe&&0===this.count)return this.failTimeout();if(s&&o&&i){if(t.eventType!=ke)return this.failTimeout();var n=this.pTime?t.timeStamp-this.pTime<e.interval:!0,r=!this.pCenter||F(this.pCenter,t.center)<e.posThreshold;this.pTime=t.timeStamp,this.pCenter=t.center,r&&n?this.count+=1:this.count=1,this._input=t;var a=this.count%e.taps;if(0===a)return this.hasRequireFailures()?(this._timer=h(function(){this.state=pi,this.tryEmit()},e.interval,this),di):pi}return mi},failTimeout:function(){return this._timer=h(function(){this.state=mi},this.options.interval,this),mi},reset:function(){clearTimeout(this._timer)},emit:function(){this.state==pi&&(this._input.tapCount=this.count,this.manager.emit(this.options.event,this._input))}}),he.VERSION="2.0.4",he.defaults={domEvents:!1,touchAction:ii,enable:!0,inputTarget:null,inputClass:null,preset:[[ne,{enable:!1}],[se,{enable:!1},["rotate"]],[re,{direction:Re}],[ie,{direction:Re},["swipe"]],[ae],[ae,{event:"doubletap",taps:2},["tap"]],[oe]],cssProps:{userSelect:"none",touchSelect:"none",touchCallout:"none",contentZooming:"none",userDrag:"none",tapHighlightColor:"rgba(0,0,0,0)"}};var fi=1,gi=2;de.prototype={set:function(t){return c(this.options,t),t.touchAction&&this.touchAction.update(),t.inputTarget&&(this.input.destroy(),this.input.target=t.inputTarget,this.input.init()),this},stop:function(t){this.session.stopped=t?gi:fi},recognize:function(t){var e=this.session;if(!e.stopped){this.touchAction.preventDefaults(t);var i,s=this.recognizers,o=e.curRecognizer;(!o||o&&o.state&pi)&&(o=e.curRecognizer=null);for(var n=0;n<s.length;)i=s[n],e.stopped===gi||o&&i!=o&&!i.canRecognizeWith(o)?i.reset():i.recognize(t),!o&&i.state&(di|li|ci)&&(o=e.curRecognizer=i),n++}},get:function(t){if(t instanceof K)return t;for(var e=this.recognizers,i=0;i<e.length;i++)if(e[i].options.event==t)return e[i];return null},add:function(t){if(d(t,"add",this))return this;var e=this.get(t.options.event);return e&&this.remove(e),this.recognizers.push(t),t.manager=this,this.touchAction.update(),t},remove:function(t){if(d(t,"remove",this))return this;var e=this.recognizers;return t=this.get(t),e.splice(w(e,t),1),this.touchAction.update(),this},on:function(t,e){var i=this.handlers;return l(x(t),function(t){i[t]=i[t]||[],i[t].push(e)}),this},off:function(t,e){var i=this.handlers;return l(x(t),function(t){e?i[t].splice(w(i[t],e),1):delete i[t]}),this},emit:function(t,e){this.options.domEvents&&ce(t,e);var i=this.handlers[t]&&this.handlers[t].slice();if(i&&i.length){e.type=t,e.preventDefault=function(){e.srcEvent.preventDefault()};for(var s=0;s<i.length;)i[s](e),s++}},destroy:function(){this.element&&le(this,!1),this.handlers={},this.session={},this.input.destroy(),this.element=null}},c(he,{INPUT_START:Oe,INPUT_MOVE:Ee,INPUT_END:ke,INPUT_CANCEL:Ne,STATE_POSSIBLE:hi,STATE_BEGAN:di,STATE_CHANGED:li,STATE_ENDED:ci,STATE_RECOGNIZED:pi,STATE_CANCELLED:ui,STATE_FAILED:mi,DIRECTION_NONE:Ie,DIRECTION_LEFT:Le,DIRECTION_RIGHT:ze,DIRECTION_UP:Pe,DIRECTION_DOWN:Ae,DIRECTION_HORIZONTAL:Re,DIRECTION_VERTICAL:Fe,DIRECTION_ALL:He,Manager:de,Input:O,TouchAction:Z,TouchInput:V,MouseInput:W,PointerEventInput:G,TouchMouseInput:q,SingleTouchInput:j,Recognizer:K,AttrRecognizer:ee,Tap:ae,Pan:ie,Swipe:re,Pinch:se,Rotate:ne,Press:oe,on:v,off:y,each:l,merge:p,extend:c,inherit:u,bindFn:m,prefixed:D}),"function"==me&&i(73)?(s=function(){return he}.call(e,i,e,t),!(s!==a&&(t.exports=s))):"undefined"!=typeof t&&t.exports?t.exports=he:o[r]=he}(window,document,"Hammer")},function(t,e){e.startWithClustering=function(){this.clusterToFit(this.constants.clustering.initialMaxNodes,!0),this.updateLabels(),this.stabilize&&this._stabilize(),this.start()},e.clusterToFit=function(t,e){for(var i=this.nodeIndices.length,s=50,o=0;i>t&&s>o;)o%3==0?(this.forceAggregateHubs(!0),this.normalizeClusterLevels()):this.increaseClusterLevel(),i=this.nodeIndices.length,o+=1;o>0&&1==e&&this.repositionNodes(),this._updateCalculationNodes()},e.openCluster=function(t){var e=this.moving;if(t.clusterSize>this.constants.clustering.sectorThreshold&&this._nodeInActiveArea(t)&&("default"!=this._sector()||1!=this.nodeIndices.length)){this._addSector(t);for(var i=0;this.nodeIndices.length<this.constants.clustering.initialMaxNodes&&10>i;)this.decreaseClusterLevel(),i+=1}else this._expandClusterNode(t,!1,!0),this._updateNodeIndexList(),this._updateDynamicEdges(),this._updateCalculationNodes(),this.updateLabels();this.moving!=e&&this.start()},e.updateClustersDefault=function(){1==this.constants.clustering.enabled&&this.updateClusters(0,!1,!1)},e.increaseClusterLevel=function(){this.updateClusters(-1,!1,!0)},e.decreaseClusterLevel=function(){this.updateClusters(1,!1,!0)},e.updateClusters=function(t,e,i,s){var o=this.moving,n=this.nodeIndices.length;this.previousScale>this.scale&&0==t&&this._collapseSector(),this.previousScale>this.scale||-1==t?this._formClusters(i):(this.previousScale<this.scale||1==t)&&(1==i?this._openClusters(e,i):this._openClustersBySize()),this._updateNodeIndexList(),this.nodeIndices.length==n&&(this.previousScale>this.scale||-1==t)&&(this._aggregateHubs(i),this._updateNodeIndexList()),(this.previousScale>this.scale||-1==t)&&(this.handleChains(),this._updateNodeIndexList()),this.previousScale=this.scale,this._updateDynamicEdges(),this.updateLabels(),this.nodeIndices.length<n&&(this.clusterSession+=1,this.normalizeClusterLevels()),(0==s||void 0===s)&&this.moving!=o&&this.start(),this._updateCalculationNodes()},e.handleChains=function(){var t=this._getChainFraction();t>this.constants.clustering.chainThreshold&&this._reduceAmountOfChains(1-this.constants.clustering.chainThreshold/t)},e._aggregateHubs=function(t){this._getHubSize(),this._formClustersByHub(t,!1)},e.forceAggregateHubs=function(t){var e=this.moving,i=this.nodeIndices.length;this._aggregateHubs(!0),this._updateNodeIndexList(),this._updateDynamicEdges(),this.updateLabels(),this.nodeIndices.length!=i&&(this.clusterSession+=1),(0==t||void 0===t)&&this.moving!=e&&this.start()},e._openClustersBySize=function(){for(var t in this.nodes)if(this.nodes.hasOwnProperty(t)){var e=this.nodes[t];1==e.inView()&&(e.width*this.scale>this.constants.clustering.screenSizeThreshold*this.frame.canvas.clientWidth||e.height*this.scale>this.constants.clustering.screenSizeThreshold*this.frame.canvas.clientHeight)&&this.openCluster(e)}},e._openClusters=function(t,e){for(var i=0;i<this.nodeIndices.length;i++){var s=this.nodes[this.nodeIndices[i]];this._expandClusterNode(s,t,e),this._updateCalculationNodes()}},e._expandClusterNode=function(t,e,i,s){if(t.clusterSize>1&&(t.clusterSize<this.constants.clustering.sectorThreshold&&(s=!0),e=s?!0:e,t.formationScale<this.scale||1==i))for(var o in t.containedNodes)if(t.containedNodes.hasOwnProperty(o)){var n=t.containedNodes[o];1==i?(n.clusterSession==t.clusterSessions[t.clusterSessions.length-1]||s)&&this._expelChildFromParent(t,o,e,i,s):this._nodeInActiveArea(t)&&this._expelChildFromParent(t,o,e,i,s)}},e._expelChildFromParent=function(t,e,i,s,o){var n=t.containedNodes[e];if(n.formationScale<this.scale||1==s){this._unselectAll(),this.nodes[e]=n,this._releaseContainedEdges(t,n),this._connectEdgeBackToChild(t,n),this._validateEdges(t),t.options.mass-=n.options.mass,t.clusterSize-=n.clusterSize,t.options.fontSize=Math.min(this.constants.clustering.maxFontSize,this.constants.nodes.fontSize+this.constants.clustering.fontSizeMultiplier*(t.clusterSize-1)),t.dynamicEdgesLength=t.dynamicEdges.length,n.x=t.x+t.growthIndicator*(.5-Math.random()),n.y=t.y+t.growthIndicator*(.5-Math.random()),delete t.containedNodes[e];var r=!1;for(var a in t.containedNodes)if(t.containedNodes.hasOwnProperty(a)&&t.containedNodes[a].clusterSession==n.clusterSession){r=!0;break}0==r&&t.clusterSessions.pop(),this._repositionBezierNodes(n),n.clusterSession=0,t.clearSizeCache(),this.moving=!0}1==i&&this._expandClusterNode(n,i,s,o)},e._repositionBezierNodes=function(t){for(var e=0;e<t.dynamicEdges.length;e++)t.dynamicEdges[e].positionBezierNode()},e._formClusters=function(t){0==t?this._formClustersByZoom():this._forceClustersByZoom()},e._formClustersByZoom=function(){var t,e,i,s=this.constants.clustering.clusterEdgeThreshold/this.scale;for(var o in this.edges)if(this.edges.hasOwnProperty(o)){var n=this.edges[o];if(n.connected&&n.toId!=n.fromId&&(t=n.to.x-n.from.x,e=n.to.y-n.from.y,i=Math.sqrt(t*t+e*e),s>i)){var r=n.from,a=n.to;n.to.options.mass>n.from.options.mass&&(r=n.to,a=n.from),1==a.dynamicEdgesLength?this._addToCluster(r,a,!1):1==r.dynamicEdgesLength&&this._addToCluster(a,r,!1)}}},e._forceClustersByZoom=function(){for(var t in this.nodes)if(this.nodes.hasOwnProperty(t)){var e=this.nodes[t];if(1==e.dynamicEdgesLength&&0!=e.dynamicEdges.length){var i=e.dynamicEdges[0],s=i.toId==e.id?this.nodes[i.fromId]:this.nodes[i.toId];e.id!=s.id&&(s.options.mass>e.options.mass?this._addToCluster(s,e,!0):this._addToCluster(e,s,!0))}}},e._clusterToSmallestNeighbour=function(t){for(var e=-1,i=null,s=0;s<t.dynamicEdges.length;s++)if(void 0!==t.dynamicEdges[s]){var o=null;t.dynamicEdges[s].fromId!=t.id?o=t.dynamicEdges[s].from:t.dynamicEdges[s].toId!=t.id&&(o=t.dynamicEdges[s].to),null!=o&&e>o.clusterSessions.length&&(e=o.clusterSessions.length,i=o)}null!=o&&void 0!==this.nodes[o.id]&&this._addToCluster(o,t,!0)},e._formClustersByHub=function(t,e){for(var i in this.nodes)this.nodes.hasOwnProperty(i)&&this._formClusterFromHub(this.nodes[i],t,e)},e._formClusterFromHub=function(t,e,i,s){if(void 0===s&&(s=0),t.dynamicEdgesLength>=this.hubThreshold&&0==i||t.dynamicEdgesLength==this.hubThreshold&&1==i){for(var o,n,r,a=this.constants.clustering.clusterEdgeThreshold/this.scale,h=!1,d=[],l=t.dynamicEdges.length,c=0;l>c;c++)d.push(t.dynamicEdges[c].id);if(0==e)for(h=!1,c=0;l>c;c++){var p=this.edges[d[c]];if(void 0!==p&&p.connected&&p.toId!=p.fromId&&(o=p.to.x-p.from.x,n=p.to.y-p.from.y,r=Math.sqrt(o*o+n*n),a>r)){h=!0;break}}if(!e&&h||e)for(c=0;l>c;c++)if(p=this.edges[d[c]],void 0!==p){var u=this.nodes[p.fromId==t.id?p.toId:p.fromId];u.dynamicEdges.length<=this.hubThreshold+s&&u.id!=t.id&&this._addToCluster(t,u,e)}}},e._addToCluster=function(t,e,i){t.containedNodes[e.id]=e;for(var s=0;s<e.dynamicEdges.length;s++){var o=e.dynamicEdges[s];o.toId==t.id||o.fromId==t.id?this._addToContainedEdges(t,e,o):this._connectEdgeToCluster(t,e,o)}e.dynamicEdges=[],this._containCircularEdgesFromNode(t,e),delete this.nodes[e.id];var n=t.options.mass;e.clusterSession=this.clusterSession,t.options.mass+=e.options.mass,t.clusterSize+=e.clusterSize,t.options.fontSize=Math.min(this.constants.clustering.maxFontSize,this.constants.nodes.fontSize+this.constants.clustering.fontSizeMultiplier*t.clusterSize),t.clusterSessions[t.clusterSessions.length-1]!=this.clusterSession&&t.clusterSessions.push(this.clusterSession),t.formationScale=1==i?0:this.scale,t.clearSizeCache(),t.containedNodes[e.id].formationScale=t.formationScale,e.clearVelocity(),t.updateVelocity(n),this.moving=!0},e._updateDynamicEdges=function(){for(var t=0;t<this.nodeIndices.length;t++){var e=this.nodes[this.nodeIndices[t]];e.dynamicEdgesLength=e.dynamicEdges.length;var i=0;if(e.dynamicEdgesLength>1)for(var s=0;s<e.dynamicEdgesLength-1;s++)for(var o=e.dynamicEdges[s].toId,n=e.dynamicEdges[s].fromId,r=s+1;r<e.dynamicEdgesLength;r++)(e.dynamicEdges[r].toId==o&&e.dynamicEdges[r].fromId==n||e.dynamicEdges[r].fromId==o&&e.dynamicEdges[r].toId==n)&&(i+=1);e.dynamicEdgesLength-=i}},e._addToContainedEdges=function(t,e,i){t.containedEdges.hasOwnProperty(e.id)||(t.containedEdges[e.id]=[]),t.containedEdges[e.id].push(i),delete this.edges[i.id];for(var s=0;s<t.dynamicEdges.length;s++)if(t.dynamicEdges[s].id==i.id){t.dynamicEdges.splice(s,1);break}},e._connectEdgeToCluster=function(t,e,i){i.toId==i.fromId?this._addToContainedEdges(t,e,i):(i.toId==e.id?(i.originalToId.push(e.id),i.to=t,i.toId=t.id):(i.originalFromId.push(e.id),i.from=t,i.fromId=t.id),this._addToReroutedEdges(t,e,i))},e._containCircularEdgesFromNode=function(t,e){for(var i=0;i<t.dynamicEdges.length;i++){var s=t.dynamicEdges[i];s.toId==s.fromId&&this._addToContainedEdges(t,e,s)}},e._addToReroutedEdges=function(t,e,i){t.reroutedEdges.hasOwnProperty(e.id)||(t.reroutedEdges[e.id]=[]),t.reroutedEdges[e.id].push(i),t.dynamicEdges.push(i)},e._connectEdgeBackToChild=function(t,e){if(t.reroutedEdges.hasOwnProperty(e.id)){for(var i=0;i<t.reroutedEdges[e.id].length;i++){var s=t.reroutedEdges[e.id][i];s.originalFromId[s.originalFromId.length-1]==e.id?(s.originalFromId.pop(),s.fromId=e.id,s.from=e):(s.originalToId.pop(),s.toId=e.id,s.to=e),e.dynamicEdges.push(s);for(var o=0;o<t.dynamicEdges.length;o++)if(t.dynamicEdges[o].id==s.id){t.dynamicEdges.splice(o,1);break}}delete t.reroutedEdges[e.id]}},e._validateEdges=function(t){for(var e=0;e<t.dynamicEdges.length;e++){var i=t.dynamicEdges[e];t.id!=i.toId&&t.id!=i.fromId&&t.dynamicEdges.splice(e,1)}},e._releaseContainedEdges=function(t,e){for(var i=0;i<t.containedEdges[e.id].length;i++){var s=t.containedEdges[e.id][i];this.edges[s.id]=s,e.dynamicEdges.push(s),t.dynamicEdges.push(s)}delete t.containedEdges[e.id]},e.updateLabels=function(){var t;for(t in this.nodes)if(this.nodes.hasOwnProperty(t)){var e=this.nodes[t];e.clusterSize>1&&(e.label="[".concat(String(e.clusterSize),"]"))}for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(e=this.nodes[t],1==e.clusterSize&&(e.label=void 0!==e.originalLabel?e.originalLabel:String(e.id)))},e.normalizeClusterLevels=function(){var t,e=0,i=1e9,s=0;for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(s=this.nodes[t].clusterSessions.length,s>e&&(e=s),i>s&&(i=s));if(e-i>this.constants.clustering.clusterLevelDifference){var o=this.nodeIndices.length,n=e-this.constants.clustering.clusterLevelDifference;for(t in this.nodes)this.nodes.hasOwnProperty(t)&&this.nodes[t].clusterSessions.length<n&&this._clusterToSmallestNeighbour(this.nodes[t]);this._updateNodeIndexList(),this._updateDynamicEdges(),this.nodeIndices.length!=o&&(this.clusterSession+=1)}},e._nodeInActiveArea=function(t){return Math.abs(t.x-this.areaCenter.x)<=this.constants.clustering.activeAreaBoxSize/this.scale&&Math.abs(t.y-this.areaCenter.y)<=this.constants.clustering.activeAreaBoxSize/this.scale},e.repositionNodes=function(){for(var t=0;t<this.nodeIndices.length;t++){var e=this.nodes[this.nodeIndices[t]];if(0==e.xFixed||0==e.yFixed){var i=1*this.nodeIndices.length*Math.min(100,e.options.mass),s=2*Math.PI*Math.random();0==e.xFixed&&(e.x=i*Math.cos(s)),0==e.yFixed&&(e.y=i*Math.sin(s)),this._repositionBezierNodes(e)}}},e._getHubSize=function(){for(var t=0,e=0,i=0,s=0,o=0;o<this.nodeIndices.length;o++){var n=this.nodes[this.nodeIndices[o]];n.dynamicEdgesLength>s&&(s=n.dynamicEdgesLength),t+=n.dynamicEdgesLength,e+=Math.pow(n.dynamicEdgesLength,2),i+=1}t/=i,e/=i;var r=e-Math.pow(t,2),a=Math.sqrt(r);this.hubThreshold=Math.floor(t+2*a),this.hubThreshold>s&&(this.hubThreshold=s)},e._reduceAmountOfChains=function(t){this.hubThreshold=2;var e=Math.floor(this.nodeIndices.length*t);for(var i in this.nodes)this.nodes.hasOwnProperty(i)&&2==this.nodes[i].dynamicEdgesLength&&this.nodes[i].dynamicEdges.length>=2&&e>0&&(this._formClusterFromHub(this.nodes[i],!0,!0,1),e-=1)},e._getChainFraction=function(){var t=0,e=0;for(var i in this.nodes)this.nodes.hasOwnProperty(i)&&(2==this.nodes[i].dynamicEdgesLength&&this.nodes[i].dynamicEdges.length>=2&&(t+=1),e+=1);return t/e}},function(t,e,i){var s=i(1),o=i(40);e._putDataInSector=function(){this.sectors.active[this._sector()].nodes=this.nodes,this.sectors.active[this._sector()].edges=this.edges,this.sectors.active[this._sector()].nodeIndices=this.nodeIndices},e._switchToSector=function(t,e){void 0===e||"active"==e?this._switchToActiveSector(t):this._switchToFrozenSector(t)},e._switchToActiveSector=function(t){this.nodeIndices=this.sectors.active[t].nodeIndices,this.nodes=this.sectors.active[t].nodes,this.edges=this.sectors.active[t].edges},e._switchToSupportSector=function(){this.nodeIndices=this.sectors.support.nodeIndices,this.nodes=this.sectors.support.nodes,this.edges=this.sectors.support.edges},e._switchToFrozenSector=function(t){this.nodeIndices=this.sectors.frozen[t].nodeIndices,this.nodes=this.sectors.frozen[t].nodes,this.edges=this.sectors.frozen[t].edges},e._loadLatestSector=function(){this._switchToSector(this._sector())},e._sector=function(){return this.activeSector[this.activeSector.length-1]},e._previousSector=function(){if(this.activeSector.length>1)return this.activeSector[this.activeSector.length-2];throw new TypeError("there are not enough sectors in the this.activeSector array.")},e._setActiveSector=function(t){this.activeSector.push(t)},e._forgetLastSector=function(){this.activeSector.pop()},e._createNewSector=function(t){this.sectors.active[t]={nodes:{},edges:{},nodeIndices:[],formationScale:this.scale,drawingNode:void 0},this.sectors.active[t].drawingNode=new o({id:t,color:{background:"#eaefef",border:"495c5e"}},{},{},this.constants),this.sectors.active[t].drawingNode.clusterSize=2},e._deleteActiveSector=function(t){delete this.sectors.active[t]},e._deleteFrozenSector=function(t){delete this.sectors.frozen[t]},e._freezeSector=function(t){this.sectors.frozen[t]=this.sectors.active[t],this._deleteActiveSector(t)},e._activateSector=function(t){this.sectors.active[t]=this.sectors.frozen[t],this._deleteFrozenSector(t)},e._mergeThisWithFrozen=function(t){for(var e in this.nodes)this.nodes.hasOwnProperty(e)&&(this.sectors.frozen[t].nodes[e]=this.nodes[e]);for(var i in this.edges)this.edges.hasOwnProperty(i)&&(this.sectors.frozen[t].edges[i]=this.edges[i]);for(var s=0;s<this.nodeIndices.length;s++)this.sectors.frozen[t].nodeIndices.push(this.nodeIndices[s])},e._collapseThisToSingleCluster=function(){this.clusterToFit(1,!1)},e._addSector=function(t){var e=this._sector();delete this.nodes[t.id];var i=s.randomUUID();this._freezeSector(e),this._createNewSector(i),this._setActiveSector(i),this._switchToSector(this._sector()),this.nodes[t.id]=t},e._collapseSector=function(){var t=this._sector();if("default"!=t&&(1==this.nodeIndices.length||this.sectors.active[t].drawingNode.width*this.scale<this.constants.clustering.screenSizeThreshold*this.frame.canvas.clientWidth||this.sectors.active[t].drawingNode.height*this.scale<this.constants.clustering.screenSizeThreshold*this.frame.canvas.clientHeight)){var e=this._previousSector();this._collapseThisToSingleCluster(),this._mergeThisWithFrozen(e),this._deleteActiveSector(t),this._activateSector(e),this._switchToSector(e),this._forgetLastSector(),this._updateNodeIndexList(),this._updateCalculationNodes()}},e._doInAllActiveSectors=function(t,e){var i=[];if(void 0===e)for(var s in this.sectors.active)this.sectors.active.hasOwnProperty(s)&&(this._switchToActiveSector(s),i.push(this[t]()));else for(var s in this.sectors.active)if(this.sectors.active.hasOwnProperty(s)){this._switchToActiveSector(s);var o=Array.prototype.splice.call(arguments,1);i.push(o.length>1?this[t](o[0],o[1]):this[t](e))}return this._loadLatestSector(),i},e._doInSupportSector=function(t,e){var i=!1;if(void 0===e)this._switchToSupportSector(),i=this[t]();else{this._switchToSupportSector();var s=Array.prototype.splice.call(arguments,1);i=s.length>1?this[t](s[0],s[1]):this[t](e)}return this._loadLatestSector(),i},e._doInAllFrozenSectors=function(t,e){if(void 0===e)for(var i in this.sectors.frozen)this.sectors.frozen.hasOwnProperty(i)&&(this._switchToFrozenSector(i),this[t]());else for(var i in this.sectors.frozen)if(this.sectors.frozen.hasOwnProperty(i)){this._switchToFrozenSector(i);var s=Array.prototype.splice.call(arguments,1);s.length>1?this[t](s[0],s[1]):this[t](e)}this._loadLatestSector()},e._doInAllSectors=function(t,e){var i=Array.prototype.splice.call(arguments,1);void 0===e?(this._doInAllActiveSectors(t),this._doInAllFrozenSectors(t)):i.length>1?(this._doInAllActiveSectors(t,i[0],i[1]),this._doInAllFrozenSectors(t,i[0],i[1])):(this._doInAllActiveSectors(t,e),this._doInAllFrozenSectors(t,e))},e._clearNodeIndexList=function(){var t=this._sector();this.sectors.active[t].nodeIndices=[],this.nodeIndices=this.sectors.active[t].nodeIndices},e._drawSectorNodes=function(t,e){var i,s=1e9,o=-1e9,n=1e9,r=-1e9;for(var a in this.sectors[e])if(this.sectors[e].hasOwnProperty(a)&&void 0!==this.sectors[e][a].drawingNode){this._switchToSector(a,e),s=1e9,o=-1e9,n=1e9,r=-1e9;for(var h in this.nodes)this.nodes.hasOwnProperty(h)&&(i=this.nodes[h],i.resize(t),n>i.x-.5*i.width&&(n=i.x-.5*i.width),r<i.x+.5*i.width&&(r=i.x+.5*i.width),s>i.y-.5*i.height&&(s=i.y-.5*i.height),o<i.y+.5*i.height&&(o=i.y+.5*i.height));i=this.sectors[e][a].drawingNode,i.x=.5*(r+n),i.y=.5*(o+s),i.width=2*(i.x-n),i.height=2*(i.y-s),i.options.radius=Math.sqrt(Math.pow(.5*i.width,2)+Math.pow(.5*i.height,2)),i.setScale(this.scale),i._drawCircle(t)}},e._drawAllSectorNodes=function(t){this._drawSectorNodes(t,"frozen"),this._drawSectorNodes(t,"active"),this._loadLatestSector()}},function(t,e,i){var s=i(40);e._getNodesOverlappingWith=function(t,e){var i=this.nodes;for(var s in i)i.hasOwnProperty(s)&&i[s].isOverlappingWith(t)&&e.push(s)},e._getAllNodesOverlappingWith=function(t){var e=[];return this._doInAllActiveSectors("_getNodesOverlappingWith",t,e),e},e._pointerToPositionObject=function(t){var e=this._XconvertDOMtoCanvas(t.x),i=this._YconvertDOMtoCanvas(t.y);return{left:e,top:i,right:e,bottom:i}},e._getNodeAt=function(t){var e=this._pointerToPositionObject(t),i=this._getAllNodesOverlappingWith(e);return i.length>0?this.nodes[i[i.length-1]]:null},e._getEdgesOverlappingWith=function(t,e){var i=this.edges;for(var s in i)i.hasOwnProperty(s)&&i[s].isOverlappingWith(t)&&e.push(s)},e._getAllEdgesOverlappingWith=function(t){var e=[];return this._doInAllActiveSectors("_getEdgesOverlappingWith",t,e),e},e._getEdgeAt=function(t){var e=this._pointerToPositionObject(t),i=this._getAllEdgesOverlappingWith(e);return i.length>0?this.edges[i[i.length-1]]:null},e._addToSelection=function(t){t instanceof s?this.selectionObj.nodes[t.id]=t:this.selectionObj.edges[t.id]=t},e._addToHover=function(t){t instanceof s?this.hoverObj.nodes[t.id]=t:this.hoverObj.edges[t.id]=t},e._removeFromSelection=function(t){t instanceof s?delete this.selectionObj.nodes[t.id]:delete this.selectionObj.edges[t.id]},e._unselectAll=function(t){void 0===t&&(t=!1);for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&this.selectionObj.nodes[e].unselect();for(var i in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(i)&&this.selectionObj.edges[i].unselect();this.selectionObj={nodes:{},edges:{}},0==t&&this.emit("select",this.getSelection())},e._unselectClusters=function(t){void 0===t&&(t=!1);for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&this.selectionObj.nodes[e].clusterSize>1&&(this.selectionObj.nodes[e].unselect(),this._removeFromSelection(this.selectionObj.nodes[e]));0==t&&this.emit("select",this.getSelection())},e._getSelectedNodeCount=function(){var t=0;for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&(t+=1);return t},e._getSelectedNode=function(){for(var t in this.selectionObj.nodes)if(this.selectionObj.nodes.hasOwnProperty(t))return this.selectionObj.nodes[t];return null},e._getSelectedEdge=function(){for(var t in this.selectionObj.edges)if(this.selectionObj.edges.hasOwnProperty(t))return this.selectionObj.edges[t];return null},e._getSelectedEdgeCount=function(){var t=0;for(var e in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(e)&&(t+=1);return t},e._getSelectedObjectCount=function(){var t=0;for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&(t+=1);for(var i in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(i)&&(t+=1);return t},e._selectionIsEmpty=function(){for(var t in this.selectionObj.nodes)if(this.selectionObj.nodes.hasOwnProperty(t))return!1;for(var e in this.selectionObj.edges)if(this.selectionObj.edges.hasOwnProperty(e))return!1;return!0},e._clusterInSelection=function(){for(var t in this.selectionObj.nodes)if(this.selectionObj.nodes.hasOwnProperty(t)&&this.selectionObj.nodes[t].clusterSize>1)return!0;return!1},e._selectConnectedEdges=function(t){for(var e=0;e<t.dynamicEdges.length;e++){var i=t.dynamicEdges[e];i.select(),this._addToSelection(i)}},e._hoverConnectedEdges=function(t){for(var e=0;e<t.dynamicEdges.length;e++){var i=t.dynamicEdges[e];i.hover=!0,this._addToHover(i)}},e._unselectConnectedEdges=function(t){for(var e=0;e<t.dynamicEdges.length;e++){var i=t.dynamicEdges[e];i.unselect(),this._removeFromSelection(i)}},e._selectObject=function(t,e,i,o,n){void 0===i&&(i=!1),void 0===o&&(o=!0),0==this._selectionIsEmpty()&&0==e&&0==this.forceAppendSelection&&this._unselectAll(!0),0!=t.selected||1!=this.constants.selectable&&!n?0==t.selected?(this._addToSelection(t),i=!0):(t.unselect(),this._removeFromSelection(t)):(t.select(),this._addToSelection(t),t instanceof s&&0==this.blockConnectingEdgeSelection&&1==o&&this._selectConnectedEdges(t)),0==i&&this.emit("select",this.getSelection())},e._blurObject=function(t){1==t.hover&&(t.hover=!1,this.emit("blurNode",{node:t.id}))},e._hoverObject=function(t){0==t.hover&&(t.hover=!0,this._addToHover(t),t instanceof s&&this.emit("hoverNode",{node:t.id})),t instanceof s&&this._hoverConnectedEdges(t)},e._handleTouch=function(){},e._handleTap=function(t){var e=this._getNodeAt(t);if(null!=e)this._selectObject(e,!1);else{var i=this._getEdgeAt(t);null!=i?this._selectObject(i,!1):this._unselectAll()}var s=this.getSelection();s.pointer={DOM:{x:t.x,y:t.y},canvas:{x:this._XconvertDOMtoCanvas(t.x),y:this._YconvertDOMtoCanvas(t.y)}},this.emit("click",s),this._redraw()},e._handleDoubleTap=function(t){var e=this._getNodeAt(t);null!=e&&void 0!==e&&(this.areaCenter={x:this._XconvertDOMtoCanvas(t.x),y:this._YconvertDOMtoCanvas(t.y)},this.openCluster(e));var i=this.getSelection();i.pointer={DOM:{x:t.x,y:t.y},canvas:{x:this._XconvertDOMtoCanvas(t.x),y:this._YconvertDOMtoCanvas(t.y)}},this.emit("doubleClick",i)},e._handleOnHold=function(t){var e=this._getNodeAt(t);if(null!=e)this._selectObject(e,!0);else{var i=this._getEdgeAt(t);null!=i&&this._selectObject(i,!0)}this._redraw()},e._handleOnRelease=function(t){this._manipulationReleaseOverload(t),this._navigationReleaseOverload(t)},e._manipulationReleaseOverload=function(){},e._navigationReleaseOverload=function(){},e.getSelection=function(){var t=this.getSelectedNodes(),e=this.getSelectedEdges();return{nodes:t,edges:e}},e.getSelectedNodes=function(){var t=[];if(1==this.constants.selectable)for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&t.push(e);return t},e.getSelectedEdges=function(){var t=[];if(1==this.constants.selectable)for(var e in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(e)&&t.push(e);return t},e.setSelection=function(){console.log("setSelection is deprecated. Please use selectNodes instead.")},e.selectNodes=function(t,e){var i,s,o;if(!t||void 0==t.length)throw"Selection must be an array with ids";for(this._unselectAll(!0),i=0,s=t.length;s>i;i++){o=t[i];var n=this.nodes[o];if(!n)throw new RangeError('Node with id "'+o+'" not found');this._selectObject(n,!0,!0,e,!0)}this.redraw()},e.selectEdges=function(t){var e,i,s;if(!t||void 0==t.length)throw"Selection must be an array with ids";for(this._unselectAll(!0),e=0,i=t.length;i>e;e++){s=t[e];var o=this.edges[s];if(!o)throw new RangeError('Edge with id "'+s+'" not found');this._selectObject(o,!0,!0,!1,!0)}this.redraw()},e._updateSelection=function(){for(var t in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(t)&&(this.nodes.hasOwnProperty(t)||delete this.selectionObj.nodes[t]);for(var e in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(e)&&(this.edges.hasOwnProperty(e)||delete this.selectionObj.edges[e])}},function(t,e,i){var s=i(1),o=i(40),n=i(37);e._clearManipulatorBar=function(){this._recursiveDOMDelete(this.manipulationDiv),this.manipulationDOM={},this._manipulationReleaseOverload=function(){},delete this.sectors.support.nodes.targetNode,delete this.sectors.support.nodes.targetViaNode,this.controlNodesActive=!1,this.freezeSimulation=!1},e._restoreOverloadedFunctions=function(){for(var t in this.cachedFunctions)this.cachedFunctions.hasOwnProperty(t)&&(this[t]=this.cachedFunctions[t],delete this.cachedFunctions[t])},e._toggleEditMode=function(){this.editMode=!this.editMode;var t=this.manipulationDiv,e=this.closeDiv,i=this.editModeDiv;1==this.editMode?(t.style.display="block",e.style.display="block",i.style.display="none",e.onclick=this._toggleEditMode.bind(this)):(t.style.display="none",e.style.display="none",i.style.display="block",e.onclick=null),this._createManipulatorBar()},e._createManipulatorBar=function(){this.boundFunction&&this.off("select",this.boundFunction);var t=this.constants.locales[this.constants.locale];if(void 0!==this.edgeBeingEdited&&(this.edgeBeingEdited._disableControlNodes(),this.edgeBeingEdited=void 0,this.selectedControlNode=null,this.controlNodesActive=!1,this._redraw()),this._restoreOverloadedFunctions(),this.freezeSimulation=!1,this.blockConnectingEdgeSelection=!1,this.forceAppendSelection=!1,this.manipulationDOM={},1==this.editMode){for(;this.manipulationDiv.hasChildNodes();)this.manipulationDiv.removeChild(this.manipulationDiv.firstChild);this.manipulationDOM.addNodeSpan=document.createElement("span"),this.manipulationDOM.addNodeSpan.className="network-manipulationUI add",this.manipulationDOM.addNodeLabelSpan=document.createElement("span"),this.manipulationDOM.addNodeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.addNodeLabelSpan.innerHTML=t.addNode,this.manipulationDOM.addNodeSpan.appendChild(this.manipulationDOM.addNodeLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.addEdgeSpan=document.createElement("span"),this.manipulationDOM.addEdgeSpan.className="network-manipulationUI connect",this.manipulationDOM.addEdgeLabelSpan=document.createElement("span"),this.manipulationDOM.addEdgeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.addEdgeLabelSpan.innerHTML=t.addEdge,this.manipulationDOM.addEdgeSpan.appendChild(this.manipulationDOM.addEdgeLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.addNodeSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.addEdgeSpan),1==this._getSelectedNodeCount()&&this.triggerFunctions.edit?(this.manipulationDOM.seperatorLineDiv2=document.createElement("div"),this.manipulationDOM.seperatorLineDiv2.className="network-seperatorLine",this.manipulationDOM.editNodeSpan=document.createElement("span"),this.manipulationDOM.editNodeSpan.className="network-manipulationUI edit",this.manipulationDOM.editNodeLabelSpan=document.createElement("span"),this.manipulationDOM.editNodeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.editNodeLabelSpan.innerHTML=t.editNode,this.manipulationDOM.editNodeSpan.appendChild(this.manipulationDOM.editNodeLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv2),this.manipulationDiv.appendChild(this.manipulationDOM.editNodeSpan)):1==this._getSelectedEdgeCount()&&0==this._getSelectedNodeCount()&&(this.manipulationDOM.seperatorLineDiv3=document.createElement("div"),this.manipulationDOM.seperatorLineDiv3.className="network-seperatorLine",this.manipulationDOM.editEdgeSpan=document.createElement("span"),this.manipulationDOM.editEdgeSpan.className="network-manipulationUI edit",this.manipulationDOM.editEdgeLabelSpan=document.createElement("span"),this.manipulationDOM.editEdgeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.editEdgeLabelSpan.innerHTML=t.editEdge,this.manipulationDOM.editEdgeSpan.appendChild(this.manipulationDOM.editEdgeLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv3),this.manipulationDiv.appendChild(this.manipulationDOM.editEdgeSpan)),0==this._selectionIsEmpty()&&(this.manipulationDOM.seperatorLineDiv4=document.createElement("div"),this.manipulationDOM.seperatorLineDiv4.className="network-seperatorLine",this.manipulationDOM.deleteSpan=document.createElement("span"),this.manipulationDOM.deleteSpan.className="network-manipulationUI delete",this.manipulationDOM.deleteLabelSpan=document.createElement("span"),this.manipulationDOM.deleteLabelSpan.className="network-manipulationLabel",this.manipulationDOM.deleteLabelSpan.innerHTML=t.del,this.manipulationDOM.deleteSpan.appendChild(this.manipulationDOM.deleteLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv4),this.manipulationDiv.appendChild(this.manipulationDOM.deleteSpan)),this.manipulationDOM.addNodeSpan.onclick=this._createAddNodeToolbar.bind(this),this.manipulationDOM.addEdgeSpan.onclick=this._createAddEdgeToolbar.bind(this),1==this._getSelectedNodeCount()&&this.triggerFunctions.edit?this.manipulationDOM.editNodeSpan.onclick=this._editNode.bind(this):1==this._getSelectedEdgeCount()&&0==this._getSelectedNodeCount()&&(this.manipulationDOM.editEdgeSpan.onclick=this._createEditEdgeToolbar.bind(this)),0==this._selectionIsEmpty()&&(this.manipulationDOM.deleteSpan.onclick=this._deleteSelected.bind(this)),this.closeDiv.onclick=this._toggleEditMode.bind(this);
 var e=this;this.boundFunction=e._createManipulatorBar,this.on("select",this.boundFunction)}else{for(;this.editModeDiv.hasChildNodes();)this.editModeDiv.removeChild(this.editModeDiv.firstChild);this.manipulationDOM.editModeSpan=document.createElement("span"),this.manipulationDOM.editModeSpan.className="network-manipulationUI edit editmode",this.manipulationDOM.editModeLabelSpan=document.createElement("span"),this.manipulationDOM.editModeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.editModeLabelSpan.innerHTML=t.edit,this.manipulationDOM.editModeSpan.appendChild(this.manipulationDOM.editModeLabelSpan),this.editModeDiv.appendChild(this.manipulationDOM.editModeSpan),this.manipulationDOM.editModeSpan.onclick=this._toggleEditMode.bind(this)}},e._createAddNodeToolbar=function(){this._clearManipulatorBar(),this.boundFunction&&this.off("select",this.boundFunction);var t=this.constants.locales[this.constants.locale];this.manipulationDOM={},this.manipulationDOM.backSpan=document.createElement("span"),this.manipulationDOM.backSpan.className="network-manipulationUI back",this.manipulationDOM.backLabelSpan=document.createElement("span"),this.manipulationDOM.backLabelSpan.className="network-manipulationLabel",this.manipulationDOM.backLabelSpan.innerHTML=t.back,this.manipulationDOM.backSpan.appendChild(this.manipulationDOM.backLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.descriptionSpan=document.createElement("span"),this.manipulationDOM.descriptionSpan.className="network-manipulationUI none",this.manipulationDOM.descriptionLabelSpan=document.createElement("span"),this.manipulationDOM.descriptionLabelSpan.className="network-manipulationLabel",this.manipulationDOM.descriptionLabelSpan.innerHTML=t.addDescription,this.manipulationDOM.descriptionSpan.appendChild(this.manipulationDOM.descriptionLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.backSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.descriptionSpan),this.manipulationDOM.backSpan.onclick=this._createManipulatorBar.bind(this);var e=this;this.boundFunction=e._addNode,this.on("select",this.boundFunction)},e._createAddEdgeToolbar=function(){this._clearManipulatorBar(),this._unselectAll(!0),this.freezeSimulation=!0,this.boundFunction&&this.off("select",this.boundFunction);var t=this.constants.locales[this.constants.locale];this._unselectAll(),this.forceAppendSelection=!1,this.blockConnectingEdgeSelection=!0,this.manipulationDOM={},this.manipulationDOM.backSpan=document.createElement("span"),this.manipulationDOM.backSpan.className="network-manipulationUI back",this.manipulationDOM.backLabelSpan=document.createElement("span"),this.manipulationDOM.backLabelSpan.className="network-manipulationLabel",this.manipulationDOM.backLabelSpan.innerHTML=t.back,this.manipulationDOM.backSpan.appendChild(this.manipulationDOM.backLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.descriptionSpan=document.createElement("span"),this.manipulationDOM.descriptionSpan.className="network-manipulationUI none",this.manipulationDOM.descriptionLabelSpan=document.createElement("span"),this.manipulationDOM.descriptionLabelSpan.className="network-manipulationLabel",this.manipulationDOM.descriptionLabelSpan.innerHTML=t.edgeDescription,this.manipulationDOM.descriptionSpan.appendChild(this.manipulationDOM.descriptionLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.backSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.descriptionSpan),this.manipulationDOM.backSpan.onclick=this._createManipulatorBar.bind(this);var e=this;this.boundFunction=e._handleConnect,this.on("select",this.boundFunction),this.cachedFunctions._handleTouch=this._handleTouch,this.cachedFunctions._manipulationReleaseOverload=this._manipulationReleaseOverload,this.cachedFunctions._handleDragStart=this._handleDragStart,this.cachedFunctions._handleDragEnd=this._handleDragEnd,this._handleTouch=this._handleConnect,this._manipulationReleaseOverload=function(){},this._handleDragStart=function(){},this._handleDragEnd=this._finishConnect,this._redraw()},e._createEditEdgeToolbar=function(){this._clearManipulatorBar(),this.controlNodesActive=!0,this.boundFunction&&this.off("select",this.boundFunction),this.edgeBeingEdited=this._getSelectedEdge(),this.edgeBeingEdited._enableControlNodes();var t=this.constants.locales[this.constants.locale];this.manipulationDOM={},this.manipulationDOM.backSpan=document.createElement("span"),this.manipulationDOM.backSpan.className="network-manipulationUI back",this.manipulationDOM.backLabelSpan=document.createElement("span"),this.manipulationDOM.backLabelSpan.className="network-manipulationLabel",this.manipulationDOM.backLabelSpan.innerHTML=t.back,this.manipulationDOM.backSpan.appendChild(this.manipulationDOM.backLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.descriptionSpan=document.createElement("span"),this.manipulationDOM.descriptionSpan.className="network-manipulationUI none",this.manipulationDOM.descriptionLabelSpan=document.createElement("span"),this.manipulationDOM.descriptionLabelSpan.className="network-manipulationLabel",this.manipulationDOM.descriptionLabelSpan.innerHTML=t.editEdgeDescription,this.manipulationDOM.descriptionSpan.appendChild(this.manipulationDOM.descriptionLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.backSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.descriptionSpan),this.manipulationDOM.backSpan.onclick=this._createManipulatorBar.bind(this),this.cachedFunctions._handleTouch=this._handleTouch,this.cachedFunctions._manipulationReleaseOverload=this._manipulationReleaseOverload,this.cachedFunctions._handleTap=this._handleTap,this.cachedFunctions._handleDragStart=this._handleDragStart,this.cachedFunctions._handleOnDrag=this._handleOnDrag,this._handleTouch=this._selectControlNode,this._handleTap=function(){},this._handleOnDrag=this._controlNodeDrag,this._handleDragStart=function(){},this._manipulationReleaseOverload=this._releaseControlNode,this._redraw()},e._selectControlNode=function(t){this.edgeBeingEdited.controlNodes.from.unselect(),this.edgeBeingEdited.controlNodes.to.unselect(),this.selectedControlNode=this.edgeBeingEdited._getSelectedControlNode(this._XconvertDOMtoCanvas(t.x),this._YconvertDOMtoCanvas(t.y)),null!==this.selectedControlNode&&(this.selectedControlNode.select(),this.freezeSimulation=!0),this._redraw()},e._controlNodeDrag=function(t){var e=this._getPointer(t.center);null!==this.selectedControlNode&&void 0!==this.selectedControlNode&&(this.selectedControlNode.x=this._XconvertDOMtoCanvas(e.x),this.selectedControlNode.y=this._YconvertDOMtoCanvas(e.y)),this._redraw()},e._releaseControlNode=function(t){var e=this._getNodeAt(t);null!==e?(1==this.edgeBeingEdited.controlNodes.from.selected&&(this.edgeBeingEdited._restoreControlNodes(),this._editEdge(e.id,this.edgeBeingEdited.to.id),this.edgeBeingEdited.controlNodes.from.unselect()),1==this.edgeBeingEdited.controlNodes.to.selected&&(this.edgeBeingEdited._restoreControlNodes(),this._editEdge(this.edgeBeingEdited.from.id,e.id),this.edgeBeingEdited.controlNodes.to.unselect())):this.edgeBeingEdited._restoreControlNodes(),this.freezeSimulation=!1,this._redraw()},e._handleConnect=function(t){if(0==this._getSelectedNodeCount()){var e=this._getNodeAt(t);if(null!=e)if(e.clusterSize>1)alert(this.constants.locales[this.constants.locale].createEdgeError);else{this._selectObject(e,!1);var i=this.sectors.support.nodes;i.targetNode=new o({id:"targetNode"},{},{},this.constants);var s=i.targetNode;s.x=e.x,s.y=e.y,this.edges.connectionEdge=new n({id:"connectionEdge",from:e.id,to:s.id},this,this.constants);var r=this.edges.connectionEdge;r.from=e,r.connected=!0,r.options.smoothCurves={enabled:!0,dynamic:!1,type:"continuous",roundness:.5},r.selected=!0,r.to=s,this.cachedFunctions._handleOnDrag=this._handleOnDrag,this._handleOnDrag=function(t){var e=this._getPointer(t.center),i=this.edges.connectionEdge;i.to.x=this._XconvertDOMtoCanvas(e.x),i.to.y=this._YconvertDOMtoCanvas(e.y)},this.moving=!0,this.start()}}},e._finishConnect=function(t){if(1==this._getSelectedNodeCount()){var e=this._getPointer(t.center);this._handleOnDrag=this.cachedFunctions._handleOnDrag,delete this.cachedFunctions._handleOnDrag;var i=this.edges.connectionEdge.fromId;delete this.edges.connectionEdge,delete this.sectors.support.nodes.targetNode,delete this.sectors.support.nodes.targetViaNode;var s=this._getNodeAt(e);null!=s&&(s.clusterSize>1?alert(this.constants.locales[this.constants.locale].createEdgeError):(this._createEdge(i,s.id),this._createManipulatorBar())),this._unselectAll()}},e._addNode=function(){if(this._selectionIsEmpty()&&1==this.editMode){var t=this._pointerToPositionObject(this.pointerPosition),e={id:s.randomUUID(),x:t.left,y:t.top,label:"new",allowedToMoveX:!0,allowedToMoveY:!0};if(this.triggerFunctions.add){if(2!=this.triggerFunctions.add.length)throw new Error("The function for add does not support two arguments (data,callback)");var i=this;this.triggerFunctions.add(e,function(t){i.nodesData.add(t),i._createManipulatorBar(),i.moving=!0,i.start()})}else this.nodesData.add(e),this._createManipulatorBar(),this.moving=!0,this.start()}},e._createEdge=function(t,e){if(1==this.editMode){var i={from:t,to:e};if(this.triggerFunctions.connect){if(2!=this.triggerFunctions.connect.length)throw new Error("The function for connect does not support two arguments (data,callback)");var s=this;this.triggerFunctions.connect(i,function(t){s.edgesData.add(t),s.moving=!0,s.start()})}else this.edgesData.add(i),this.moving=!0,this.start()}},e._editEdge=function(t,e){if(1==this.editMode){var i={id:this.edgeBeingEdited.id,from:t,to:e};if(this.triggerFunctions.editEdge){if(2!=this.triggerFunctions.editEdge.length)throw new Error("The function for edit does not support two arguments (data, callback)");var s=this;this.triggerFunctions.editEdge(i,function(t){s.edgesData.update(t),s.moving=!0,s.start()})}else this.edgesData.update(i),this.moving=!0,this.start()}},e._editNode=function(){if(!this.triggerFunctions.edit||1!=this.editMode)throw new Error("No edit function has been bound to this button");var t=this._getSelectedNode(),e={id:t.id,label:t.label,group:t.options.group,shape:t.options.shape,color:{background:t.options.color.background,border:t.options.color.border,highlight:{background:t.options.color.highlight.background,border:t.options.color.highlight.border}}};if(2!=this.triggerFunctions.edit.length)throw new Error("The function for edit does not support two arguments (data, callback)");var i=this;this.triggerFunctions.edit(e,function(t){i.nodesData.update(t),i._createManipulatorBar(),i.moving=!0,i.start()})},e._deleteSelected=function(){if(!this._selectionIsEmpty()&&1==this.editMode)if(this._clusterInSelection())alert(this.constants.locales[this.constants.locale].deleteClusterError);else{var t=this.getSelectedNodes(),e=this.getSelectedEdges();if(this.triggerFunctions.del){var i=this,s={nodes:t,edges:e};if(2!=this.triggerFunctions.del.length)throw new Error("The function for delete does not support two arguments (data, callback)");this.triggerFunctions.del(s,function(t){i.edgesData.remove(t.edges),i.nodesData.remove(t.nodes),i._unselectAll(),i.moving=!0,i.start()})}else this.edgesData.remove(e),this.nodesData.remove(t),this._unselectAll(),this.moving=!0,this.start()}}},function(t,e,i){var s=(i(1),i(47)),o=i(45);e._cleanNavigation=function(){if(0!=this.navigationHammers.existing.length){for(var t=0;t<this.navigationHammers.existing.length;t++)this.navigationHammers.existing[t].destroy();this.navigationHammers.existing=[]}this._navigationReleaseOverload=function(){},this.navigationDivs&&this.navigationDivs.wrapper&&this.navigationDivs.wrapper.parentNode&&this.navigationDivs.wrapper.parentNode.removeChild(this.navigationDivs.wrapper)},e._loadNavigationElements=function(){this._cleanNavigation(),this.navigationDivs={};var t=["up","down","left","right","zoomIn","zoomOut","zoomExtends"],e=["_moveUp","_moveDown","_moveLeft","_moveRight","_zoomIn","_zoomOut","_zoomExtent"];this.navigationDivs.wrapper=document.createElement("div"),this.frame.appendChild(this.navigationDivs.wrapper);for(var i=0;i<t.length;i++){this.navigationDivs[t[i]]=document.createElement("div"),this.navigationDivs[t[i]].className="network-navigation "+t[i],this.navigationDivs.wrapper.appendChild(this.navigationDivs[t[i]]);var n=new o(this.navigationDivs[t[i]],{prevent_default:!0});s.onTouch(n,this[e[i]].bind(this)),s.onRelease(n,this._onRelease.bind(this)),this.navigationHammers._new.push(n)}this._navigationReleaseOverload=this._stopMovement,this.navigationHammers.existing=this.navigationHammers._new},e._zoomExtent=function(t){this.zoomExtent({duration:700}),t.stopPropagation()},e._stopMovement=function(){this._xStopMoving(),this._yStopMoving(),this._stopZoom()},e._moveUp=function(t){this.yIncrement=this.constants.keyboard.speed.y,this.start(),t.preventDefault()},e._moveDown=function(t){this.yIncrement=-this.constants.keyboard.speed.y,this.start(),t.preventDefault()},e._moveLeft=function(t){this.xIncrement=this.constants.keyboard.speed.x,this.start(),t.preventDefault()},e._moveRight=function(t){this.xIncrement=-this.constants.keyboard.speed.y,this.start(),t.preventDefault()},e._zoomIn=function(t){this.zoomIncrement=this.constants.keyboard.speed.zoom,this.start(),t.preventDefault()},e._zoomOut=function(t){this.zoomIncrement=-this.constants.keyboard.speed.zoom,this.start(),t.preventDefault()},e._stopZoom=function(t){this.zoomIncrement=0,t&&t.preventDefault()},e._yStopMoving=function(t){this.yIncrement=0,t&&t.preventDefault()},e._xStopMoving=function(t){this.xIncrement=0,t&&t.preventDefault()}},function(t,e){e._resetLevels=function(){for(var t in this.nodes)if(this.nodes.hasOwnProperty(t)){var e=this.nodes[t];0==e.preassignedLevel&&(e.level=-1,e.hierarchyEnumerated=!1)}},e._setupHierarchicalLayout=function(){if(1==this.constants.hierarchicalLayout.enabled&&this.nodeIndices.length>0){var t,e,i=0,s=!1,o=!1;for(e in this.nodes)this.nodes.hasOwnProperty(e)&&(t=this.nodes[e],-1!=t.level?s=!0:o=!0,i<t.edges.length&&(i=t.edges.length));if(1==o&&1==s)throw new Error("To use the hierarchical layout, nodes require either no predefined levels or levels have to be defined for all nodes.");this._changeConstants(),1==o&&("hubsize"==this.constants.hierarchicalLayout.layout?this._determineLevels(i):this._determineLevelsDirected(!1));var n=this._getDistribution();this._placeNodesByHierarchy(n),this.start()}},e._placeNodesByHierarchy=function(t){var e,i;for(var s in t)if(t.hasOwnProperty(s))for(e in t[s].nodes)t[s].nodes.hasOwnProperty(e)&&(i=t[s].nodes[e],"UD"==this.constants.hierarchicalLayout.direction||"DU"==this.constants.hierarchicalLayout.direction?i.xFixed&&(i.x=t[s].minPos,i.xFixed=!1,t[s].minPos+=t[s].nodeSpacing):i.yFixed&&(i.y=t[s].minPos,i.yFixed=!1,t[s].minPos+=t[s].nodeSpacing),this._placeBranchNodes(i.edges,i.id,t,i.level));this._stabilize()},e._getDistribution=function(){var t,e,i,s={};for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(e=this.nodes[t],e.xFixed=!0,e.yFixed=!0,"UD"==this.constants.hierarchicalLayout.direction||"DU"==this.constants.hierarchicalLayout.direction?e.y=this.constants.hierarchicalLayout.levelSeparation*e.level:e.x=this.constants.hierarchicalLayout.levelSeparation*e.level,void 0===s[e.level]&&(s[e.level]={amount:0,nodes:{},minPos:0,nodeSpacing:0}),s[e.level].amount+=1,s[e.level].nodes[t]=e);var o=0;for(i in s)s.hasOwnProperty(i)&&o<s[i].amount&&(o=s[i].amount);for(i in s)s.hasOwnProperty(i)&&(s[i].nodeSpacing=(o+1)*this.constants.hierarchicalLayout.nodeSpacing,s[i].nodeSpacing/=s[i].amount+1,s[i].minPos=s[i].nodeSpacing-.5*(s[i].amount+1)*s[i].nodeSpacing);return s},e._determineLevels=function(t){var e,i;for(e in this.nodes)this.nodes.hasOwnProperty(e)&&(i=this.nodes[e],i.edges.length==t&&(i.level=0));for(e in this.nodes)this.nodes.hasOwnProperty(e)&&(i=this.nodes[e],0==i.level&&this._setLevel(1,i.edges,i.id))},e._determineLevelsDirected=function(){var t,e,i,s=1e4;i=this.nodes[this.nodeIndices[0]],i.level=s,this._setLevelDirected(s,i.edges,i.id);for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(e=this.nodes[t],s=e.level<s?e.level:s);for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(e=this.nodes[t],e.level-=s)},e._changeConstants=function(){this.constants.clustering.enabled=!1,this.constants.physics.barnesHut.enabled=!1,this.constants.physics.hierarchicalRepulsion.enabled=!0,this._loadSelectedForceSolver(),1==this.constants.smoothCurves.enabled&&(this.constants.smoothCurves.dynamic=!1),this._configureSmoothCurves();var t=this.constants.hierarchicalLayout;t.levelSeparation=Math.abs(t.levelSeparation),("RL"==t.direction||"DU"==t.direction)&&(t.levelSeparation*=-1),"RL"==t.direction||"LR"==t.direction?1==this.constants.smoothCurves.enabled&&(this.constants.smoothCurves.type="vertical"):1==this.constants.smoothCurves.enabled&&(this.constants.smoothCurves.type="horizontal")},e._placeBranchNodes=function(t,e,i,s){for(var o=0;o<t.length;o++){var n=null;n=t[o].toId==e?t[o].from:t[o].to;var r=!1;"UD"==this.constants.hierarchicalLayout.direction||"DU"==this.constants.hierarchicalLayout.direction?n.xFixed&&n.level>s&&(n.xFixed=!1,n.x=i[n.level].minPos,r=!0):n.yFixed&&n.level>s&&(n.yFixed=!1,n.y=i[n.level].minPos,r=!0),1==r&&(i[n.level].minPos+=i[n.level].nodeSpacing,n.edges.length>1&&this._placeBranchNodes(n.edges,n.id,i,n.level))}},e._setLevel=function(t,e,i){for(var s=0;s<e.length;s++){var o=null;o=e[s].toId==i?e[s].from:e[s].to,(-1==o.level||o.level>t)&&(o.level=t,o.edges.length>1&&this._setLevel(t+1,o.edges,o.id))}},e._setLevelDirected=function(t,e,i){this.nodes[i].hierarchyEnumerated=!0;for(var s,o,n=0;n<e.length;n++)o=1,e[n].toId==i?(s=e[n].from,o=-1):s=e[n].to,-1==s.level&&(s.level=t+o);for(var n=0;n<e.length;n++)s=e[n].toId==i?e[n].from:e[n].to,s.edges.length>1&&s.hierarchyEnumerated===!1&&this._setLevelDirected(s.level,s.edges,s.id)},e._restoreNodes=function(){for(var t in this.nodes)this.nodes.hasOwnProperty(t)&&(this.nodes[t].xFixed=!1,this.nodes[t].yFixed=!1)}},function(t,e,i){function s(){this.constants.smoothCurves.enabled=!this.constants.smoothCurves.enabled;var t=document.getElementById("graph_toggleSmooth");t.style.background=1==this.constants.smoothCurves.enabled?"#A4FF56":"#FF8532",this._configureSmoothCurves(!1)}function o(){for(var t in this.calculationNodes)this.calculationNodes.hasOwnProperty(t)&&(this.calculationNodes[t].vx=0,this.calculationNodes[t].vy=0,this.calculationNodes[t].fx=0,this.calculationNodes[t].fy=0);1==this.constants.hierarchicalLayout.enabled?(this._setupHierarchicalLayout(),a.call(this,"graph_H_nd",1,"physics_hierarchicalRepulsion_nodeDistance"),a.call(this,"graph_H_cg",1,"physics_centralGravity"),a.call(this,"graph_H_sc",1,"physics_springConstant"),a.call(this,"graph_H_sl",1,"physics_springLength"),a.call(this,"graph_H_damp",1,"physics_damping")):this.repositionNodes(),this.moving=!0,this.start()}function n(){var t="No options are required, default values used.",e=[],i=document.getElementById("graph_physicsMethod1"),s=document.getElementById("graph_physicsMethod2");if(1==i.checked){if(this.constants.physics.barnesHut.gravitationalConstant!=this.backupConstants.physics.barnesHut.gravitationalConstant&&e.push("gravitationalConstant: "+this.constants.physics.barnesHut.gravitationalConstant),this.constants.physics.centralGravity!=this.backupConstants.physics.barnesHut.centralGravity&&e.push("centralGravity: "+this.constants.physics.centralGravity),this.constants.physics.springLength!=this.backupConstants.physics.barnesHut.springLength&&e.push("springLength: "+this.constants.physics.springLength),this.constants.physics.springConstant!=this.backupConstants.physics.barnesHut.springConstant&&e.push("springConstant: "+this.constants.physics.springConstant),this.constants.physics.damping!=this.backupConstants.physics.barnesHut.damping&&e.push("damping: "+this.constants.physics.damping),0!=e.length){t="var options = {",t+="physics: {barnesHut: {";for(var o=0;o<e.length;o++)t+=e[o],o<e.length-1&&(t+=", ");t+="}}"}this.constants.smoothCurves.enabled!=this.backupConstants.smoothCurves.enabled&&(0==e.length?t="var options = {":t+=", ",t+="smoothCurves: "+this.constants.smoothCurves.enabled),"No options are required, default values used."!=t&&(t+="};")}else if(1==s.checked){if(t="var options = {",t+="physics: {barnesHut: {enabled: false}",this.constants.physics.repulsion.nodeDistance!=this.backupConstants.physics.repulsion.nodeDistance&&e.push("nodeDistance: "+this.constants.physics.repulsion.nodeDistance),this.constants.physics.centralGravity!=this.backupConstants.physics.repulsion.centralGravity&&e.push("centralGravity: "+this.constants.physics.centralGravity),this.constants.physics.springLength!=this.backupConstants.physics.repulsion.springLength&&e.push("springLength: "+this.constants.physics.springLength),this.constants.physics.springConstant!=this.backupConstants.physics.repulsion.springConstant&&e.push("springConstant: "+this.constants.physics.springConstant),this.constants.physics.damping!=this.backupConstants.physics.repulsion.damping&&e.push("damping: "+this.constants.physics.damping),0!=e.length){t+=", repulsion: {";for(var o=0;o<e.length;o++)t+=e[o],o<e.length-1&&(t+=", ");t+="}}"}0==e.length&&(t+="}"),this.constants.smoothCurves!=this.backupConstants.smoothCurves&&(t+=", smoothCurves: "+this.constants.smoothCurves),t+="};"}else{if(t="var options = {",this.constants.physics.hierarchicalRepulsion.nodeDistance!=this.backupConstants.physics.hierarchicalRepulsion.nodeDistance&&e.push("nodeDistance: "+this.constants.physics.hierarchicalRepulsion.nodeDistance),this.constants.physics.centralGravity!=this.backupConstants.physics.hierarchicalRepulsion.centralGravity&&e.push("centralGravity: "+this.constants.physics.centralGravity),this.constants.physics.springLength!=this.backupConstants.physics.hierarchicalRepulsion.springLength&&e.push("springLength: "+this.constants.physics.springLength),this.constants.physics.springConstant!=this.backupConstants.physics.hierarchicalRepulsion.springConstant&&e.push("springConstant: "+this.constants.physics.springConstant),this.constants.physics.damping!=this.backupConstants.physics.hierarchicalRepulsion.damping&&e.push("damping: "+this.constants.physics.damping),0!=e.length){t+="physics: {hierarchicalRepulsion: {";for(var o=0;o<e.length;o++)t+=e[o],o<e.length-1&&(t+=", ");t+="}},"}if(t+="hierarchicalLayout: {",e=[],this.constants.hierarchicalLayout.direction!=this.backupConstants.hierarchicalLayout.direction&&e.push("direction: "+this.constants.hierarchicalLayout.direction),Math.abs(this.constants.hierarchicalLayout.levelSeparation)!=this.backupConstants.hierarchicalLayout.levelSeparation&&e.push("levelSeparation: "+this.constants.hierarchicalLayout.levelSeparation),this.constants.hierarchicalLayout.nodeSpacing!=this.backupConstants.hierarchicalLayout.nodeSpacing&&e.push("nodeSpacing: "+this.constants.hierarchicalLayout.nodeSpacing),0!=e.length){for(var o=0;o<e.length;o++)t+=e[o],o<e.length-1&&(t+=", ");t+="}"}else t+="enabled:true}";t+="};"}this.optionsDiv.innerHTML=t}function r(){var t=["graph_BH_table","graph_R_table","graph_H_table"],e=document.querySelector('input[name="graph_physicsMethod"]:checked').value,i="graph_"+e+"_table",s=document.getElementById(i);s.style.display="block";for(var o=0;o<t.length;o++)t[o]!=i&&(s=document.getElementById(t[o]),s.style.display="none");this._restoreNodes(),"R"==e?(this.constants.hierarchicalLayout.enabled=!1,this.constants.physics.hierarchicalRepulsion.enabled=!1,this.constants.physics.barnesHut.enabled=!1):"H"==e?0==this.constants.hierarchicalLayout.enabled&&(this.constants.hierarchicalLayout.enabled=!0,this.constants.physics.hierarchicalRepulsion.enabled=!0,this.constants.physics.barnesHut.enabled=!1,this.constants.smoothCurves.enabled=!1,this._setupHierarchicalLayout()):(this.constants.hierarchicalLayout.enabled=!1,this.constants.physics.hierarchicalRepulsion.enabled=!1,this.constants.physics.barnesHut.enabled=!0),this._loadSelectedForceSolver();var n=document.getElementById("graph_toggleSmooth");n.style.background=1==this.constants.smoothCurves.enabled?"#A4FF56":"#FF8532",this.moving=!0,this.start()}function a(t,e,i){var s=t+"_value",o=document.getElementById(t).value;Array.isArray(e)?(document.getElementById(s).value=e[parseInt(o)],this._overWriteGraphConstants(i,e[parseInt(o)])):(document.getElementById(s).value=parseInt(e)*parseFloat(o),this._overWriteGraphConstants(i,parseInt(e)*parseFloat(o))),("hierarchicalLayout_direction"==i||"hierarchicalLayout_levelSeparation"==i||"hierarchicalLayout_nodeSpacing"==i)&&this._setupHierarchicalLayout(),this.moving=!0,this.start()}var h=i(1),d=i(69),l=i(70),c=i(71);e._toggleBarnesHut=function(){this.constants.physics.barnesHut.enabled=!this.constants.physics.barnesHut.enabled,this._loadSelectedForceSolver(),this.moving=!0,this.start()},e._loadSelectedForceSolver=function(){1==this.constants.physics.barnesHut.enabled?(this._clearMixin(d),this._clearMixin(l),this.constants.physics.centralGravity=this.constants.physics.barnesHut.centralGravity,this.constants.physics.springLength=this.constants.physics.barnesHut.springLength,this.constants.physics.springConstant=this.constants.physics.barnesHut.springConstant,this.constants.physics.damping=this.constants.physics.barnesHut.damping,this._loadMixin(c)):1==this.constants.physics.hierarchicalRepulsion.enabled?(this._clearMixin(c),this._clearMixin(d),this.constants.physics.centralGravity=this.constants.physics.hierarchicalRepulsion.centralGravity,this.constants.physics.springLength=this.constants.physics.hierarchicalRepulsion.springLength,this.constants.physics.springConstant=this.constants.physics.hierarchicalRepulsion.springConstant,this.constants.physics.damping=this.constants.physics.hierarchicalRepulsion.damping,this._loadMixin(l)):(this._clearMixin(c),this._clearMixin(l),this.barnesHutTree=void 0,this.constants.physics.centralGravity=this.constants.physics.repulsion.centralGravity,this.constants.physics.springLength=this.constants.physics.repulsion.springLength,this.constants.physics.springConstant=this.constants.physics.repulsion.springConstant,this.constants.physics.damping=this.constants.physics.repulsion.damping,this._loadMixin(d))},e._initializeForceCalculation=function(){1==this.nodeIndices.length?this.nodes[this.nodeIndices[0]]._setForce(0,0):(this.nodeIndices.length>this.constants.clustering.clusterThreshold&&1==this.constants.clustering.enabled&&this.clusterToFit(this.constants.clustering.reduceToNodes,!1),this._calculateForces())},e._calculateForces=function(){this._calculateGravitationalForces(),this._calculateNodeForces(),this.constants.physics.springConstant>0&&(1==this.constants.smoothCurves.enabled&&1==this.constants.smoothCurves.dynamic?this._calculateSpringForcesWithSupport():1==this.constants.physics.hierarchicalRepulsion.enabled?this._calculateHierarchicalSpringForces():this._calculateSpringForces())},e._updateCalculationNodes=function(){if(1==this.constants.smoothCurves.enabled&&1==this.constants.smoothCurves.dynamic){this.calculationNodes={},this.calculationNodeIndices=[];for(var t in this.nodes)this.nodes.hasOwnProperty(t)&&(this.calculationNodes[t]=this.nodes[t]);var e=this.sectors.support.nodes;for(var i in e)e.hasOwnProperty(i)&&(this.edges.hasOwnProperty(e[i].parentEdgeId)?this.calculationNodes[i]=e[i]:e[i]._setForce(0,0));for(var s in this.calculationNodes)this.calculationNodes.hasOwnProperty(s)&&this.calculationNodeIndices.push(s)}else this.calculationNodes=this.nodes,this.calculationNodeIndices=this.nodeIndices},e._calculateGravitationalForces=function(){var t,e,i,s,o,n=this.calculationNodes,r=this.constants.physics.centralGravity,a=0;for(o=0;o<this.calculationNodeIndices.length;o++)s=n[this.calculationNodeIndices[o]],s.damping=this.constants.physics.damping,"default"==this._sector()&&0!=r?(t=-s.x,e=-s.y,i=Math.sqrt(t*t+e*e),a=0==i?0:r/i,s.fx=t*a,s.fy=e*a):(s.fx=0,s.fy=0)},e._calculateSpringForces=function(){var t,e,i,s,o,n,r,a,h,d=this.edges;for(i in d)d.hasOwnProperty(i)&&(e=d[i],e.connected&&this.nodes.hasOwnProperty(e.toId)&&this.nodes.hasOwnProperty(e.fromId)&&(t=e.physics.springLength,t+=(e.to.clusterSize+e.from.clusterSize-2)*this.constants.clustering.edgeGrowth,s=e.from.x-e.to.x,o=e.from.y-e.to.y,h=Math.sqrt(s*s+o*o),0==h&&(h=.01),a=this.constants.physics.springConstant*(t-h)/h,n=s*a,r=o*a,e.from.fx+=n,e.from.fy+=r,e.to.fx-=n,e.to.fy-=r))},e._calculateSpringForcesWithSupport=function(){var t,e,i,s,o=this.edges;for(i in o)if(o.hasOwnProperty(i)&&(e=o[i],e.connected&&this.nodes.hasOwnProperty(e.toId)&&this.nodes.hasOwnProperty(e.fromId)&&null!=e.via)){var n=e.to,r=e.via,a=e.from;t=e.physics.springLength,s=n.clusterSize+a.clusterSize-2,t+=s*this.constants.clustering.edgeGrowth,this._calculateSpringForce(n,r,.5*t),this._calculateSpringForce(r,a,.5*t)}},e._calculateSpringForce=function(t,e,i){var s,o,n,r,a,h;s=t.x-e.x,o=t.y-e.y,h=Math.sqrt(s*s+o*o),0==h&&(h=.01),a=this.constants.physics.springConstant*(i-h)/h,n=s*a,r=o*a,t.fx+=n,t.fy+=r,e.fx-=n,e.fy-=r},e._cleanupPhysicsConfiguration=function(){if(void 0!==this.physicsConfiguration){for(;this.physicsConfiguration.hasChildNodes();)this.physicsConfiguration.removeChild(this.physicsConfiguration.firstChild);this.physicsConfiguration.parentNode.removeChild(this.physicsConfiguration),this.physicsConfiguration=void 0}},e._loadPhysicsConfiguration=function(){if(void 0===this.physicsConfiguration){this.backupConstants={},h.deepExtend(this.backupConstants,this.constants);var t=["LR","RL","UD","DU"];this.physicsConfiguration=document.createElement("div"),this.physicsConfiguration.className="PhysicsConfiguration",this.physicsConfiguration.innerHTML='<table><tr><td><b>Simulation Mode:</b></td></tr><tr><td width="120px"><input type="radio" name="graph_physicsMethod" id="graph_physicsMethod1" value="BH" checked="checked">Barnes Hut</td><td width="120px"><input type="radio" name="graph_physicsMethod" id="graph_physicsMethod2" value="R">Repulsion</td><td width="120px"><input type="radio" name="graph_physicsMethod" id="graph_physicsMethod3" value="H">Hierarchical</td></tr></table><table id="graph_BH_table" style="display:none"><tr><td><b>Barnes Hut</b></td></tr><tr><td width="150px">gravitationalConstant</td><td>0</td><td><input type="range" min="0" max="20000" value="'+-1*this.constants.physics.barnesHut.gravitationalConstant+'" step="25" style="width:300px" id="graph_BH_gc"></td><td  width="50px">-20000</td><td><input value="'+-1*this.constants.physics.barnesHut.gravitationalConstant+'" id="graph_BH_gc_value" style="width:60px"></td></tr><tr><td width="150px">centralGravity</td><td>0</td><td><input type="range" min="0" max="3"  value="'+this.constants.physics.barnesHut.centralGravity+'" step="0.05"  style="width:300px" id="graph_BH_cg"></td><td>3</td><td><input value="'+this.constants.physics.barnesHut.centralGravity+'" id="graph_BH_cg_value" style="width:60px"></td></tr><tr><td width="150px">springLength</td><td>0</td><td><input type="range" min="0" max="500" value="'+this.constants.physics.barnesHut.springLength+'" step="1" style="width:300px" id="graph_BH_sl"></td><td>500</td><td><input value="'+this.constants.physics.barnesHut.springLength+'" id="graph_BH_sl_value" style="width:60px"></td></tr><tr><td width="150px">springConstant</td><td>0</td><td><input type="range" min="0" max="0.5" value="'+this.constants.physics.barnesHut.springConstant+'" step="0.001" style="width:300px" id="graph_BH_sc"></td><td>0.5</td><td><input value="'+this.constants.physics.barnesHut.springConstant+'" id="graph_BH_sc_value" style="width:60px"></td></tr><tr><td width="150px">damping</td><td>0</td><td><input type="range" min="0" max="0.3" value="'+this.constants.physics.barnesHut.damping+'" step="0.005" style="width:300px" id="graph_BH_damp"></td><td>0.3</td><td><input value="'+this.constants.physics.barnesHut.damping+'" id="graph_BH_damp_value" style="width:60px"></td></tr></table><table id="graph_R_table" style="display:none"><tr><td><b>Repulsion</b></td></tr><tr><td width="150px">nodeDistance</td><td>0</td><td><input type="range" min="0" max="300" value="'+this.constants.physics.repulsion.nodeDistance+'" step="1" style="width:300px" id="graph_R_nd"></td><td width="50px">300</td><td><input value="'+this.constants.physics.repulsion.nodeDistance+'" id="graph_R_nd_value" style="width:60px"></td></tr><tr><td width="150px">centralGravity</td><td>0</td><td><input type="range" min="0" max="3"  value="'+this.constants.physics.repulsion.centralGravity+'" step="0.05"  style="width:300px" id="graph_R_cg"></td><td>3</td><td><input value="'+this.constants.physics.repulsion.centralGravity+'" id="graph_R_cg_value" style="width:60px"></td></tr><tr><td width="150px">springLength</td><td>0</td><td><input type="range" min="0" max="500" value="'+this.constants.physics.repulsion.springLength+'" step="1" style="width:300px" id="graph_R_sl"></td><td>500</td><td><input value="'+this.constants.physics.repulsion.springLength+'" id="graph_R_sl_value" style="width:60px"></td></tr><tr><td width="150px">springConstant</td><td>0</td><td><input type="range" min="0" max="0.5" value="'+this.constants.physics.repulsion.springConstant+'" step="0.001" style="width:300px" id="graph_R_sc"></td><td>0.5</td><td><input value="'+this.constants.physics.repulsion.springConstant+'" id="graph_R_sc_value" style="width:60px"></td></tr><tr><td width="150px">damping</td><td>0</td><td><input type="range" min="0" max="0.3" value="'+this.constants.physics.repulsion.damping+'" step="0.005" style="width:300px" id="graph_R_damp"></td><td>0.3</td><td><input value="'+this.constants.physics.repulsion.damping+'" id="graph_R_damp_value" style="width:60px"></td></tr></table><table id="graph_H_table" style="display:none"><tr><td width="150"><b>Hierarchical</b></td></tr><tr><td width="150px">nodeDistance</td><td>0</td><td><input type="range" min="0" max="300" value="'+this.constants.physics.hierarchicalRepulsion.nodeDistance+'" step="1" style="width:300px" id="graph_H_nd"></td><td width="50px">300</td><td><input value="'+this.constants.physics.hierarchicalRepulsion.nodeDistance+'" id="graph_H_nd_value" style="width:60px"></td></tr><tr><td width="150px">centralGravity</td><td>0</td><td><input type="range" min="0" max="3"  value="'+this.constants.physics.hierarchicalRepulsion.centralGravity+'" step="0.05"  style="width:300px" id="graph_H_cg"></td><td>3</td><td><input value="'+this.constants.physics.hierarchicalRepulsion.centralGravity+'" id="graph_H_cg_value" style="width:60px"></td></tr><tr><td width="150px">springLength</td><td>0</td><td><input type="range" min="0" max="500" value="'+this.constants.physics.hierarchicalRepulsion.springLength+'" step="1" style="width:300px" id="graph_H_sl"></td><td>500</td><td><input value="'+this.constants.physics.hierarchicalRepulsion.springLength+'" id="graph_H_sl_value" style="width:60px"></td></tr><tr><td width="150px">springConstant</td><td>0</td><td><input type="range" min="0" max="0.5" value="'+this.constants.physics.hierarchicalRepulsion.springConstant+'" step="0.001" style="width:300px" id="graph_H_sc"></td><td>0.5</td><td><input value="'+this.constants.physics.hierarchicalRepulsion.springConstant+'" id="graph_H_sc_value" style="width:60px"></td></tr><tr><td width="150px">damping</td><td>0</td><td><input type="range" min="0" max="0.3" value="'+this.constants.physics.hierarchicalRepulsion.damping+'" step="0.005" style="width:300px" id="graph_H_damp"></td><td>0.3</td><td><input value="'+this.constants.physics.hierarchicalRepulsion.damping+'" id="graph_H_damp_value" style="width:60px"></td></tr><tr><td width="150px">direction</td><td>1</td><td><input type="range" min="0" max="3" value="'+t.indexOf(this.constants.hierarchicalLayout.direction)+'" step="1" style="width:300px" id="graph_H_direction"></td><td>4</td><td><input value="'+this.constants.hierarchicalLayout.direction+'" id="graph_H_direction_value" style="width:60px"></td></tr><tr><td width="150px">levelSeparation</td><td>1</td><td><input type="range" min="0" max="500" value="'+this.constants.hierarchicalLayout.levelSeparation+'" step="1" style="width:300px" id="graph_H_levsep"></td><td>500</td><td><input value="'+this.constants.hierarchicalLayout.levelSeparation+'" id="graph_H_levsep_value" style="width:60px"></td></tr><tr><td width="150px">nodeSpacing</td><td>1</td><td><input type="range" min="0" max="500" value="'+this.constants.hierarchicalLayout.nodeSpacing+'" step="1" style="width:300px" id="graph_H_nspac"></td><td>500</td><td><input value="'+this.constants.hierarchicalLayout.nodeSpacing+'" id="graph_H_nspac_value" style="width:60px"></td></tr></table><table><tr><td><b>Options:</b></td></tr><tr><td width="180px"><input type="button" id="graph_toggleSmooth" value="Toggle smoothCurves" style="width:150px"></td><td width="180px"><input type="button" id="graph_repositionNodes" value="Reinitialize" style="width:150px"></td><td width="180px"><input type="button" id="graph_generateOptions" value="Generate Options" style="width:150px"></td></tr></table>',this.containerElement.parentElement.insertBefore(this.physicsConfiguration,this.containerElement),this.optionsDiv=document.createElement("div"),this.optionsDiv.style.fontSize="14px",this.optionsDiv.style.fontFamily="verdana",this.containerElement.parentElement.insertBefore(this.optionsDiv,this.containerElement);
 var e;e=document.getElementById("graph_BH_gc"),e.onchange=a.bind(this,"graph_BH_gc",-1,"physics_barnesHut_gravitationalConstant"),e=document.getElementById("graph_BH_cg"),e.onchange=a.bind(this,"graph_BH_cg",1,"physics_centralGravity"),e=document.getElementById("graph_BH_sc"),e.onchange=a.bind(this,"graph_BH_sc",1,"physics_springConstant"),e=document.getElementById("graph_BH_sl"),e.onchange=a.bind(this,"graph_BH_sl",1,"physics_springLength"),e=document.getElementById("graph_BH_damp"),e.onchange=a.bind(this,"graph_BH_damp",1,"physics_damping"),e=document.getElementById("graph_R_nd"),e.onchange=a.bind(this,"graph_R_nd",1,"physics_repulsion_nodeDistance"),e=document.getElementById("graph_R_cg"),e.onchange=a.bind(this,"graph_R_cg",1,"physics_centralGravity"),e=document.getElementById("graph_R_sc"),e.onchange=a.bind(this,"graph_R_sc",1,"physics_springConstant"),e=document.getElementById("graph_R_sl"),e.onchange=a.bind(this,"graph_R_sl",1,"physics_springLength"),e=document.getElementById("graph_R_damp"),e.onchange=a.bind(this,"graph_R_damp",1,"physics_damping"),e=document.getElementById("graph_H_nd"),e.onchange=a.bind(this,"graph_H_nd",1,"physics_hierarchicalRepulsion_nodeDistance"),e=document.getElementById("graph_H_cg"),e.onchange=a.bind(this,"graph_H_cg",1,"physics_centralGravity"),e=document.getElementById("graph_H_sc"),e.onchange=a.bind(this,"graph_H_sc",1,"physics_springConstant"),e=document.getElementById("graph_H_sl"),e.onchange=a.bind(this,"graph_H_sl",1,"physics_springLength"),e=document.getElementById("graph_H_damp"),e.onchange=a.bind(this,"graph_H_damp",1,"physics_damping"),e=document.getElementById("graph_H_direction"),e.onchange=a.bind(this,"graph_H_direction",t,"hierarchicalLayout_direction"),e=document.getElementById("graph_H_levsep"),e.onchange=a.bind(this,"graph_H_levsep",1,"hierarchicalLayout_levelSeparation"),e=document.getElementById("graph_H_nspac"),e.onchange=a.bind(this,"graph_H_nspac",1,"hierarchicalLayout_nodeSpacing");var i=document.getElementById("graph_physicsMethod1"),d=document.getElementById("graph_physicsMethod2"),l=document.getElementById("graph_physicsMethod3");d.checked=!0,this.constants.physics.barnesHut.enabled&&(i.checked=!0),this.constants.hierarchicalLayout.enabled&&(l.checked=!0);var c=document.getElementById("graph_toggleSmooth"),p=document.getElementById("graph_repositionNodes"),u=document.getElementById("graph_generateOptions");c.onclick=s.bind(this),p.onclick=o.bind(this),u.onclick=n.bind(this),c.style.background=1==this.constants.smoothCurves&&0==this.constants.dynamicSmoothCurves?"#A4FF56":"#FF8532",r.apply(this),i.onchange=r.bind(this),d.onchange=r.bind(this),l.onchange=r.bind(this)}},e._overWriteGraphConstants=function(t,e){var i=t.split("_");1==i.length?this.constants[i[0]]=e:2==i.length?this.constants[i[0]][i[1]]=e:3==i.length&&(this.constants[i[0]][i[1]][i[2]]=e)}},function(t){function e(t){throw new Error("Cannot find module '"+t+"'.")}e.keys=function(){return[]},e.resolve=e,t.exports=e,e.id=68},function(t,e){e._calculateNodeForces=function(){var t,e,i,s,o,n,r,a,h,d,l,c=this.calculationNodes,p=this.calculationNodeIndices,u=-2/3,m=4/3,f=this.constants.physics.repulsion.nodeDistance,g=f;for(d=0;d<p.length-1;d++)for(a=c[p[d]],l=d+1;l<p.length;l++){h=c[p[l]],n=a.clusterSize+h.clusterSize-2,t=h.x-a.x,e=h.y-a.y,i=Math.sqrt(t*t+e*e),0==i&&(i=.1*Math.random(),t=i),g=0==n?f:f*(1+n*this.constants.clustering.distanceAmplification);var v=u/g;2*g>i&&(r=.5*g>i?1:v*i+m,r*=0==n?1:1+n*this.constants.clustering.forceAmplification,r/=Math.max(i,.01*g),s=t*r,o=e*r,a.fx-=s,a.fy-=o,h.fx+=s,h.fy+=o)}}},function(t,e){e._calculateNodeForces=function(){var t,e,i,s,o,n,r,a,h,d,l=this.calculationNodes,c=this.calculationNodeIndices,p=this.constants.physics.hierarchicalRepulsion.nodeDistance;for(h=0;h<c.length-1;h++)for(r=l[c[h]],d=h+1;d<c.length;d++)if(a=l[c[d]],r.level==a.level){t=a.x-r.x,e=a.y-r.y,i=Math.sqrt(t*t+e*e);var u=.05;n=p>i?-Math.pow(u*i,2)+Math.pow(u*p,2):0,0==i?i=.01:n/=i,s=t*n,o=e*n,r.fx-=s,r.fy-=o,a.fx+=s,a.fy+=o}},e._calculateHierarchicalSpringForces=function(){for(var t,e,i,s,o,n,r,a,h,d=this.edges,l=this.calculationNodes,c=this.calculationNodeIndices,p=0;p<c.length;p++){var u=l[c[p]];u.springFx=0,u.springFy=0}for(i in d)if(d.hasOwnProperty(i)&&(e=d[i],e.connected&&this.nodes.hasOwnProperty(e.toId)&&this.nodes.hasOwnProperty(e.fromId)))if(t=e.physics.springLength,t+=(e.to.clusterSize+e.from.clusterSize-2)*this.constants.clustering.edgeGrowth,s=e.from.x-e.to.x,o=e.from.y-e.to.y,h=Math.sqrt(s*s+o*o),0==h&&(h=.01),a=this.constants.physics.springConstant*(t-h)/h,n=s*a,r=o*a,e.to.level!=e.from.level)e.to.springFx-=n,e.to.springFy-=r,e.from.springFx+=n,e.from.springFy+=r;else{var m=.5;e.to.fx-=m*n,e.to.fy-=m*r,e.from.fx+=m*n,e.from.fy+=m*r}var f,g,a=1;for(p=0;p<c.length;p++){var v=l[c[p]];f=Math.min(a,Math.max(-a,v.springFx)),g=Math.min(a,Math.max(-a,v.springFy)),v.fx+=f,v.fy+=g}var y=0,b=0;for(p=0;p<c.length;p++){var v=l[c[p]];y+=v.fx,b+=v.fy}var _=y/c.length,x=b/c.length;for(p=0;p<c.length;p++){var v=l[c[p]];v.fx-=_,v.fy-=x}}},function(t,e){e._calculateNodeForces=function(){if(0!=this.constants.physics.barnesHut.gravitationalConstant){var t,e=this.calculationNodes,i=this.calculationNodeIndices,s=i.length;this._formBarnesHutTree(e,i);for(var o=this.barnesHutTree,n=0;s>n;n++)t=e[i[n]],t.options.mass>0&&(this._getForceContribution(o.root.children.NW,t),this._getForceContribution(o.root.children.NE,t),this._getForceContribution(o.root.children.SW,t),this._getForceContribution(o.root.children.SE,t))}},e._getForceContribution=function(t,e){if(t.childrenCount>0){var i,s,o;if(i=t.centerOfMass.x-e.x,s=t.centerOfMass.y-e.y,o=Math.sqrt(i*i+s*s),o*t.calcSize>this.constants.physics.barnesHut.thetaInverted){0==o&&(o=.1*Math.random(),i=o);var n=this.constants.physics.barnesHut.gravitationalConstant*t.mass*e.options.mass/(o*o*o),r=i*n,a=s*n;e.fx+=r,e.fy+=a}else if(4==t.childrenCount)this._getForceContribution(t.children.NW,e),this._getForceContribution(t.children.NE,e),this._getForceContribution(t.children.SW,e),this._getForceContribution(t.children.SE,e);else if(t.children.data.id!=e.id){0==o&&(o=.5*Math.random(),i=o);var n=this.constants.physics.barnesHut.gravitationalConstant*t.mass*e.options.mass/(o*o*o),r=i*n,a=s*n;e.fx+=r,e.fy+=a}}},e._formBarnesHutTree=function(t,e){for(var i,s=e.length,o=Number.MAX_VALUE,n=Number.MAX_VALUE,r=-Number.MAX_VALUE,a=-Number.MAX_VALUE,h=0;s>h;h++){var d=t[e[h]].x,l=t[e[h]].y;t[e[h]].options.mass>0&&(o>d&&(o=d),d>r&&(r=d),n>l&&(n=l),l>a&&(a=l))}var c=Math.abs(r-o)-Math.abs(a-n);c>0?(n-=.5*c,a+=.5*c):(o+=.5*c,r-=.5*c);var p=1e-5,u=Math.max(p,Math.abs(r-o)),m=.5*u,f=.5*(o+r),g=.5*(n+a),v={root:{centerOfMass:{x:0,y:0},mass:0,range:{minX:f-m,maxX:f+m,minY:g-m,maxY:g+m},size:u,calcSize:1/u,children:{data:null},maxWidth:0,level:0,childrenCount:4}};for(this._splitBranch(v.root),h=0;s>h;h++)i=t[e[h]],i.options.mass>0&&this._placeInTree(v.root,i);this.barnesHutTree=v},e._updateBranchMass=function(t,e){var i=t.mass+e.options.mass,s=1/i;t.centerOfMass.x=t.centerOfMass.x*t.mass+e.x*e.options.mass,t.centerOfMass.x*=s,t.centerOfMass.y=t.centerOfMass.y*t.mass+e.y*e.options.mass,t.centerOfMass.y*=s,t.mass=i;var o=Math.max(Math.max(e.height,e.radius),e.width);t.maxWidth=t.maxWidth<o?o:t.maxWidth},e._placeInTree=function(t,e,i){(1!=i||void 0===i)&&this._updateBranchMass(t,e),t.children.NW.range.maxX>e.x?t.children.NW.range.maxY>e.y?this._placeInRegion(t,e,"NW"):this._placeInRegion(t,e,"SW"):t.children.NW.range.maxY>e.y?this._placeInRegion(t,e,"NE"):this._placeInRegion(t,e,"SE")},e._placeInRegion=function(t,e,i){switch(t.children[i].childrenCount){case 0:t.children[i].children.data=e,t.children[i].childrenCount=1,this._updateBranchMass(t.children[i],e);break;case 1:t.children[i].children.data.x==e.x&&t.children[i].children.data.y==e.y?(e.x+=Math.random(),e.y+=Math.random()):(this._splitBranch(t.children[i]),this._placeInTree(t.children[i],e));break;case 4:this._placeInTree(t.children[i],e)}},e._splitBranch=function(t){var e=null;1==t.childrenCount&&(e=t.children.data,t.mass=0,t.centerOfMass.x=0,t.centerOfMass.y=0),t.childrenCount=4,t.children.data=null,this._insertRegion(t,"NW"),this._insertRegion(t,"NE"),this._insertRegion(t,"SW"),this._insertRegion(t,"SE"),null!=e&&this._placeInTree(t,e)},e._insertRegion=function(t,e){var i,s,o,n,r=.5*t.size;switch(e){case"NW":i=t.range.minX,s=t.range.minX+r,o=t.range.minY,n=t.range.minY+r;break;case"NE":i=t.range.minX+r,s=t.range.maxX,o=t.range.minY,n=t.range.minY+r;break;case"SW":i=t.range.minX,s=t.range.minX+r,o=t.range.minY+r,n=t.range.maxY;break;case"SE":i=t.range.minX+r,s=t.range.maxX,o=t.range.minY+r,n=t.range.maxY}t.children[e]={centerOfMass:{x:0,y:0},mass:0,range:{minX:i,maxX:s,minY:o,maxY:n},size:.5*t.size,calcSize:2*t.calcSize,children:{data:null},maxWidth:0,level:t.level+1,childrenCount:0}},e._drawTree=function(t,e){void 0!==this.barnesHutTree&&(t.lineWidth=1,this._drawBranch(this.barnesHutTree.root,t,e))},e._drawBranch=function(t,e,i){void 0===i&&(i="#FF0000"),4==t.childrenCount&&(this._drawBranch(t.children.NW,e),this._drawBranch(t.children.NE,e),this._drawBranch(t.children.SE,e),this._drawBranch(t.children.SW,e)),e.strokeStyle=i,e.beginPath(),e.moveTo(t.range.minX,t.range.minY),e.lineTo(t.range.maxX,t.range.minY),e.stroke(),e.beginPath(),e.moveTo(t.range.maxX,t.range.minY),e.lineTo(t.range.maxX,t.range.maxY),e.stroke(),e.beginPath(),e.moveTo(t.range.maxX,t.range.maxY),e.lineTo(t.range.minX,t.range.maxY),e.stroke(),e.beginPath(),e.moveTo(t.range.minX,t.range.maxY),e.lineTo(t.range.minX,t.range.minY),e.stroke()}},function(t){t.exports=function(t){return t.webpackPolyfill||(t.deprecate=function(){},t.paths=[],t.children=[],t.webpackPolyfill=1),t}},function(t,e){(function(e){t.exports=e}).call(e,{})}])});
-//# sourceMappingURL=vis.map

From 4e7360e12dc71c2391764e3596a7971b4d9d7bfc Mon Sep 17 00:00:00 2001
From: vinodkc <vinod.kc.in@gmail.com>
Date: Fri, 8 May 2015 14:07:53 -0700
Subject: [PATCH 045/320] [SPARK-7489] [SPARK SHELL] Spark shell crashes when
 compiled with scala 2.11

Spark shell crashes when compiled with scala 2.11 and  SPARK_PREPEND_CLASSES=true

There is a similar Resolved JIRA issue -SPARK-7470 and a PR https://github.com/apache/spark/pull/5997 , which handled same issue only in scala 2.10

Author: vinodkc <vinod.kc.in@gmail.com>

Closes #6013 from vinodkc/fix_sqlcontext_exception_scala_2.11 and squashes the following commits:

119061c [vinodkc] Spark shell crashes when compiled with scala 2.11
---
 repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 2210fbaafeadb..f4f4b626988e9 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -88,7 +88,7 @@ object Main extends Logging {
       logInfo("Created sql context (with Hive support)..")
     }
     catch {
-      case cnf: java.lang.ClassNotFoundException =>
+      case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError =>
         sqlContext = new SQLContext(sparkContext)
         logInfo("Created sql context..")
     }

From 31da40dfeeeab69ee7974992328e3f67046ad3da Mon Sep 17 00:00:00 2001
From: Tim Ellison <t.p.ellison@gmail.com>
Date: Fri, 8 May 2015 14:08:52 -0700
Subject: [PATCH 046/320] [MINOR] Defeat early garbage collection of test suite
 variable

The JVM is free to collect references to variables that no longer participate in a computation.  This simple patch adds an operation to the variable 'rdd' to ensure it is not collected early in the test suite's explicit calls to GC.

ref: http://bugs.java.com/view_bug.do?bug_id=6721588

Author: Tim Ellison <t.p.ellison@gmail.com>

Closes #6010 from tellison/master and squashes the following commits:

77d1c8f [Tim Ellison] Defeat early garbage collection of test suite variable by aggressive JVMs
---
 core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index c7868ddcf770f..cb30e1f4e63a1 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -183,6 +183,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     }
 
     // Test that GC causes shuffle cleanup after dereferencing the RDD
+    rdd.count()  // Defeat any early collection of rdd variable by the JVM
     val postGCTester = new CleanerTester(sc, shuffleIds = Seq(0))
     rdd = null  // Make RDD out of scope, so that corresponding shuffle goes out of scope
     runGC()

From 3b0c5e71f156516fd8bbbeda70e69b487b0c1418 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Fri, 8 May 2015 14:09:39 -0700
Subject: [PATCH 047/320] [SPARK-7466] DAG visualization: fix orphan nodes

Simple fix. We were comparing an option with `null`.

Before:
<img src="https://issues.apache.org/jira/secure/attachment/12731383/before.png" width="250px"/>
After:
<img src="https://issues.apache.org/jira/secure/attachment/12731384/after.png" width="250px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6002 from andrewor14/dag-viz-orphan-nodes and squashes the following commits:

a1468dc [Andrew Or] Fix null check
---
 .../scala/org/apache/spark/ui/scope/RDDOperationGraph.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index c7045c98c8a03..b470aaa676e78 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -101,7 +101,7 @@ private[ui] object RDDOperationGraph extends Logging {
       val node = nodes.getOrElseUpdate(
         rdd.id, RDDOperationNode(rdd.id, rdd.name, rdd.storageLevel != StorageLevel.NONE))
 
-      if (rdd.scope == null) {
+      if (rdd.scope.isEmpty) {
         // This RDD has no encompassing scope, so we put it directly in the root cluster
         // This should happen only if an RDD is instantiated outside of a public RDD API
         rootCluster.attachChildNode(node)

From 9042f8f3784f10f695cba6b80c054695b1c152c5 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Fri, 8 May 2015 14:10:27 -0700
Subject: [PATCH 048/320] [MINOR] [CORE] Allow History Server to read kerberos
 opts from config file.

Order of initialization code was wrong.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #5998 from vanzin/hs-conf-fix and squashes the following commits:

00b6b6b [Marcelo Vanzin] [minor] [core] Allow History Server to read kerberos opts from config file.
---
 .../scala/org/apache/spark/deploy/history/HistoryServer.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 50522e69dc519..fc5182d3692b8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -203,8 +203,8 @@ object HistoryServer extends Logging {
 
   def main(argStrings: Array[String]) {
     SignalLogger.register(log)
-    initSecurity()
     new HistoryServerArguments(conf, argStrings)
+    initSecurity()
     val securityManager = new SecurityManager(conf)
 
     val providerName = conf.getOption("spark.history.provider")

From 5467c34c3d6538e053957b5513df218f1f5bae6b Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Fri, 8 May 2015 14:12:58 -0700
Subject: [PATCH 049/320] [SPARK-7378] [CORE] Handle deep links to unloaded
 apps.

The code was treating deep links as if they were attempt IDs, so
for example if you tried to load "/history/app1/jobs" directly,
that would fail because the code would treat "jobs" as an attempt id.

This change modifies the code to try both cases - first without an
attempt id, then with it, so that deep links are handled correctly.
This assumes that the links in the Spark UI do not clash with the
attempt id namespace, though, which is the case for YARN at least,
which is the only backend that currently publishes attempt IDs.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #5922 from vanzin/SPARK-7378 and squashes the following commits:

96f648b [Marcelo Vanzin] Fix comparison.
ed3bcd4 [Marcelo Vanzin] Merge branch 'master' into SPARK-7378
23483e4 [Marcelo Vanzin] Fat fingers.
b728f08 [Marcelo Vanzin] [SPARK-7378] [core] Handle deep links to unloaded apps.
---
 .../spark/deploy/history/HistoryServer.scala  | 48 +++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index fc5182d3692b8..517cbe5176241 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -83,31 +83,27 @@ class HistoryServer(
         return
       }
 
-      val appKey =
-        if (parts.length == 3) {
-          s"${parts(1)}/${parts(2)}"
-        } else {
-          parts(1)
+      val appId = parts(1)
+      val attemptId = if (parts.length >= 3) Some(parts(2)) else None
+
+      // Since we may have applications with multiple attempts mixed with applications with a
+      // single attempt, we need to try both. Try the single-attempt route first, and if an
+      // error is raised, then try the multiple attempt route.
+      if (!loadAppUi(appId, None) && (!attemptId.isDefined || !loadAppUi(appId, attemptId))) {
+        val msg = <div class="row-fluid">Application {appId} not found.</div>
+        res.setStatus(HttpServletResponse.SC_NOT_FOUND)
+        UIUtils.basicSparkPage(msg, "Not Found").foreach { n =>
+          res.getWriter().write(n.toString)
         }
+        return
+      }
 
       // Note we don't use the UI retrieved from the cache; the cache loader above will register
       // the app's UI, and all we need to do is redirect the user to the same URI that was
       // requested, and the proper data should be served at that point.
-      try {
-        appCache.get(appKey)
-        res.sendRedirect(res.encodeRedirectURL(req.getRequestURI()))
-      } catch {
-        case e: Exception => e.getCause() match {
-          case nsee: NoSuchElementException =>
-            val msg = <div class="row-fluid">Application {appKey} not found.</div>
-            res.setStatus(HttpServletResponse.SC_NOT_FOUND)
-            UIUtils.basicSparkPage(msg, "Not Found").foreach(
-              n => res.getWriter().write(n.toString))
-
-          case cause: Exception => throw cause
-        }
-      }
+      res.sendRedirect(res.encodeRedirectURL(req.getRequestURI()))
     }
+
     // SPARK-5983 ensure TRACE is not supported
     protected override def doTrace(req: HttpServletRequest, res: HttpServletResponse): Unit = {
       res.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED)
@@ -183,6 +179,20 @@ class HistoryServer(
    */
   def getProviderConfig(): Map[String, String] = provider.getConfig()
 
+  private def loadAppUi(appId: String, attemptId: Option[String]): Boolean = {
+    try {
+      appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse(""))
+      true
+    } catch {
+      case e: Exception => e.getCause() match {
+        case nsee: NoSuchElementException =>
+          false
+
+        case cause: Exception => throw cause
+      }
+    }
+  }
+
 }
 
 /**

From 90527f560462cc2d693176bd961b02767e460e06 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 8 May 2015 14:41:16 -0700
Subject: [PATCH 050/320] [SPARK-7390] [SQL] Only merge other CovarianceCounter
 when its count is greater than zero

JIRA: https://issues.apache.org/jira/browse/SPARK-7390

Also fix a minor typo.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #5931 from viirya/fix_covariancecounter and squashes the following commits:

352eda6 [Liang-Chi Hsieh] Only merge other CovarianceCounter when its count is greater than zero.
---
 .../sql/execution/stat/StatFunctions.scala    | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 386ac969f1e7d..71b7f6c2a6756 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -38,7 +38,7 @@ private[sql] object StatFunctions extends Logging {
     var yAvg = 0.0 // the mean of all examples seen so far in col2
     var Ck = 0.0 // the co-moment after k examples
     var MkX = 0.0 // sum of squares of differences from the (current) mean for col1
-    var MkY = 0.0 // sum of squares of differences from the (current) mean for col1
+    var MkY = 0.0 // sum of squares of differences from the (current) mean for col2
     var count = 0L // count of observed examples
     // add an example to the calculation
     def add(x: Double, y: Double): this.type = {
@@ -55,15 +55,17 @@ private[sql] object StatFunctions extends Logging {
     // merge counters from other partitions. Formula can be found at:
     // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
     def merge(other: CovarianceCounter): this.type = {
-      val totalCount = count + other.count
-      val deltaX = xAvg - other.xAvg
-      val deltaY = yAvg - other.yAvg
-      Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count
-      xAvg = (xAvg * count + other.xAvg * other.count) / totalCount
-      yAvg = (yAvg * count + other.yAvg * other.count) / totalCount
-      MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count
-      MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count
-      count = totalCount
+      if (other.count > 0) {
+        val totalCount = count + other.count
+        val deltaX = xAvg - other.xAvg
+        val deltaY = yAvg - other.yAvg
+        Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count
+        xAvg = (xAvg * count + other.xAvg * other.count) / totalCount
+        yAvg = (yAvg * count + other.yAvg * other.count) / totalCount
+        MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count
+        MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count
+        count = totalCount
+      }
       this
     }
     // return the sample covariance for the observed examples

From 6dad76e5eba3c2925bfc9d142f31f7c2dc649886 Mon Sep 17 00:00:00 2001
From: Jacky Li <jacky.likun@huawei.com>
Date: Fri, 8 May 2015 15:25:54 -0700
Subject: [PATCH 051/320] [SPARK-4699] [SQL] Make caseSensitive configurable in
 spark sql analyzer

based on #3558

Author: Jacky Li <jacky.likun@huawei.com>
Author: wangfei <wangfei1@huawei.com>
Author: scwf <wangfei1@huawei.com>

Closes #5806 from scwf/case and squashes the following commits:

cd51712 [wangfei] fix compile
d4b724f [wangfei] address michael's comment
af512c7 [wangfei] fix conflicts
4ef1be7 [wangfei] fix conflicts
269cf21 [scwf] fix conflicts
b73df6c [scwf] style issue
9e11752 [scwf] improve SimpleCatalystConf
b35529e [scwf] minor style
a3f7659 [scwf] remove unsed imports
2a56515 [scwf] fix conflicts
6db4bf5 [scwf] also fix for HiveContext
7fc4a98 [scwf] fix test case
d5a9933 [wangfei] fix style
eee75ba [wangfei] fix EmptyConf
6ef31cf [wangfei] revert pom changes
5d7c456 [wangfei] set CASE_SENSITIVE false in TestHive
966e719 [wangfei] set CASE_SENSITIVE false in hivecontext
fd30e25 [wangfei] added override
69b3b70 [wangfei] fix AnalysisSuite
5472b08 [wangfei] fix compile issue
56034ca [wangfei] fix conflicts and improve for catalystconf
664d1e9 [Jacky Li] Merge branch 'master' of https://github.com/apache/spark into case
12eca9a [Jacky Li] solve conflict with master
39e369c [Jacky Li] fix confilct after DataFrame PR
dee56e9 [Jacky Li] fix test case failure
05b09a3 [Jacky Li] fix conflict base on the latest master branch
73c16b1 [Jacky Li] fix bug in sql/hive
9bf4cc7 [Jacky Li] fix bug in catalyst
005c56d [Jacky Li] make SQLContext caseSensitivity configurable
6332e0f [Jacky Li] fix bug
fcbf0d9 [Jacky Li] fix scalastyle check
e7bca31 [Jacky Li] make caseSensitive configuration in Analyzer and Catalog
91b1b96 [Jacky Li] make caseSensitive configurable in Analyzer
f57f15c [Jacky Li] add testcase
578d167 [Jacky Li] make caseSensitive configurable
---
 .../spark/sql/catalyst/CatalystConf.scala     | 35 +++++++++++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      | 16 ++++++---
 .../spark/sql/catalyst/analysis/Catalog.scala | 20 ++++++-----
 .../sql/catalyst/analysis/AnalysisSuite.scala | 14 ++++----
 .../analysis/DecimalPrecisionSuite.scala      |  9 +++--
 .../scala/org/apache/spark/sql/SQLConf.scala  | 13 +++++--
 .../org/apache/spark/sql/SQLContext.scala     |  4 +--
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 10 ++++++
 .../spark/sql/sources/DataSourceTest.scala    | 20 +++--------
 .../apache/spark/sql/hive/HiveContext.scala   |  5 ++-
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 32 +++++++----------
 .../apache/spark/sql/hive/test/TestHive.scala |  8 +++--
 .../sql/hive/execution/HiveQuerySuite.scala   |  1 -
 .../sql/hive/execution/SQLQuerySuite.scala    |  6 ++++
 .../apache/spark/sql/hive/parquetSuites.scala |  4 +--
 15 files changed, 127 insertions(+), 70 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
new file mode 100644
index 0000000000000..3f351b07b37df
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+private[spark] trait CatalystConf {
+  def caseSensitiveAnalysis: Boolean
+}
+
+/**
+ * A trivial conf that is empty.  Used for testing when all
+ * relations are already filled in and the analyser needs only to resolve attribute references.
+ */
+object EmptyConf extends CatalystConf {
+  override def caseSensitiveAnalysis: Boolean = {
+    throw new UnsupportedOperationException
+  }
+}
+
+/** A CatalystConf that can be used for local testing. */
+case class SimpleCatalystConf(caseSensitiveAnalysis: Boolean) extends CatalystConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ecbac57ea4d62..a4c61149dd975 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -19,19 +19,21 @@ package org.apache.spark.sql.catalyst.analysis
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.util.collection.OpenHashSet
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types._
+import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
  * when all relations are already filled in and the analyzer needs only to resolve attribute
  * references.
  */
-object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true)
+object SimpleAnalyzer
+  extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, new SimpleCatalystConf(true))
 
 /**
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
@@ -41,11 +43,17 @@ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true
 class Analyzer(
     catalog: Catalog,
     registry: FunctionRegistry,
-    caseSensitive: Boolean,
+    conf: CatalystConf,
     maxIterations: Int = 100)
   extends RuleExecutor[LogicalPlan] with HiveTypeCoercion with CheckAnalysis {
 
-  val resolver = if (caseSensitive) caseSensitiveResolution else caseInsensitiveResolution
+  def resolver: Resolver = {
+    if (conf.caseSensitiveAnalysis) {
+      caseSensitiveResolution
+    } else {
+      caseInsensitiveResolution
+    }
+  }
 
   val fixedPoint = FixedPoint(maxIterations)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index 18c24b651921a..208021c421326 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.analysis
 
 import scala.collection.mutable
 
+import org.apache.spark.sql.catalyst.CatalystConf
+import org.apache.spark.sql.catalyst.EmptyConf
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery}
 
 /**
@@ -34,7 +36,7 @@ class NoSuchDatabaseException extends Exception
  */
 trait Catalog {
 
-  def caseSensitive: Boolean
+  val conf: CatalystConf
 
   def tableExists(tableIdentifier: Seq[String]): Boolean
 
@@ -57,10 +59,10 @@ trait Catalog {
   def unregisterAllTables(): Unit
 
   protected def processTableIdentifier(tableIdentifier: Seq[String]): Seq[String] = {
-    if (!caseSensitive) {
-      tableIdentifier.map(_.toLowerCase)
-    } else {
+    if (conf.caseSensitiveAnalysis) {
       tableIdentifier
+    } else {
+      tableIdentifier.map(_.toLowerCase)
     }
   }
 
@@ -78,7 +80,7 @@ trait Catalog {
   }
 }
 
-class SimpleCatalog(val caseSensitive: Boolean) extends Catalog {
+class SimpleCatalog(val conf: CatalystConf) extends Catalog {
   val tables = new mutable.HashMap[String, LogicalPlan]()
 
   override def registerTable(
@@ -164,10 +166,10 @@ trait OverrideCatalog extends Catalog {
   }
 
   abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
-    val dbName = if (!caseSensitive) {
-      if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None
-    } else {
+    val dbName = if (conf.caseSensitiveAnalysis) {
       databaseName
+    } else {
+      if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None
     }
 
     val temporaryTables = overrides.filter {
@@ -207,7 +209,7 @@ trait OverrideCatalog extends Catalog {
  */
 object EmptyCatalog extends Catalog {
 
-  override val caseSensitive: Boolean = true
+  override val conf: CatalystConf = EmptyConf
 
   override def tableExists(tableIdentifier: Seq[String]): Boolean = {
     throw new UnsupportedOperationException
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 971e1ff5ec2b8..6f2f35564d12e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -23,24 +23,26 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
-
+import org.apache.spark.sql.catalyst.SimpleCatalystConf
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 
 class AnalysisSuite extends FunSuite with BeforeAndAfter {
-  val caseSensitiveCatalog = new SimpleCatalog(true)
-  val caseInsensitiveCatalog = new SimpleCatalog(false)
+  val caseSensitiveConf = new SimpleCatalystConf(true)
+  val caseInsensitiveConf = new SimpleCatalystConf(false)
+
+  val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf)
+  val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf)
 
   val caseSensitiveAnalyzer =
-    new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitive = true) {
+    new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) {
       override val extendedResolutionRules = EliminateSubQueries :: Nil
     }
   val caseInsensitiveAnalyzer =
-    new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseSensitive = false) {
+    new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) {
       override val extendedResolutionRules = EliminateSubQueries :: Nil
     }
 
-
   def caseSensitiveAnalyze(plan: LogicalPlan): Unit =
     caseSensitiveAnalyzer.checkAnalysis(caseSensitiveAnalyzer.execute(plan))
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index 36b03d1c65e28..565b1cfe019c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.scalatest.{BeforeAndAfter, FunSuite}
+
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Union, Project, LocalRelation}
 import org.apache.spark.sql.types._
-import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.apache.spark.sql.catalyst.SimpleCatalystConf
 
 class DecimalPrecisionSuite extends FunSuite with BeforeAndAfter {
-  val catalog = new SimpleCatalog(false)
-  val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, caseSensitive = false)
+  val conf = new SimpleCatalystConf(true)
+  val catalog = new SimpleCatalog(conf)
+  val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf)
 
   val relation = LocalRelation(
     AttributeReference("i", IntegerType)(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index bfaddd0f2ce1b..98a75bb4ed2df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql
 
+import java.util.Properties
+
 import scala.collection.immutable
 import scala.collection.JavaConversions._
 
-import java.util.Properties
+import org.apache.spark.sql.catalyst.CatalystConf
 
 private[spark] object SQLConf {
   val COMPRESS_CACHED = "spark.sql.inMemoryColumnarStorage.compressed"
@@ -32,6 +34,7 @@ private[spark] object SQLConf {
   val CODEGEN_ENABLED = "spark.sql.codegen"
   val UNSAFE_ENABLED = "spark.sql.unsafe.enabled"
   val DIALECT = "spark.sql.dialect"
+  val CASE_SENSITIVE = "spark.sql.caseSensitive"
 
   val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString"
   val PARQUET_INT96_AS_TIMESTAMP = "spark.sql.parquet.int96AsTimestamp"
@@ -89,7 +92,8 @@ private[spark] object SQLConf {
  *
  * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
  */
-private[sql] class SQLConf extends Serializable {
+
+private[sql] class SQLConf extends Serializable with CatalystConf {
   import SQLConf._
 
   /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
@@ -158,6 +162,11 @@ private[sql] class SQLConf extends Serializable {
    */
   private[spark] def codegenEnabled: Boolean = getConf(CODEGEN_ENABLED, "false").toBoolean
 
+  /**
+   * caseSensitive analysis true by default
+   */
+  def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, "true").toBoolean
+
   /**
    * When set to true, Spark SQL will use managed memory for certain operations.  This option only
    * takes effect if codegen is enabled.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0ac0936f0f592..28fc9d04436f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -148,7 +148,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   // TODO how to handle the temp table per user session?
   @transient
-  protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true)
+  protected[sql] lazy val catalog: Catalog = new SimpleCatalog(conf)
 
   // TODO how to handle the temp function per user session?
   @transient
@@ -156,7 +156,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   @transient
   protected[sql] lazy val analyzer: Analyzer =
-    new Analyzer(catalog, functionRegistry, caseSensitive = true) {
+    new Analyzer(catalog, functionRegistry, conf) {
       override val extendedResolutionRules =
         ExtractPythonUdfs ::
         sources.PreInsertCastAndRename ::
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 77be3b8b206c0..b44eb223c80c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.execution.GeneratedAggregate
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext.{udf => _, _}
@@ -1277,6 +1278,15 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
     checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), Row(2))
   }
 
+  test("SPARK-4699 case sensitivity SQL query") {
+    setConf(SQLConf.CASE_SENSITIVE, "false")
+    val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil
+    val rdd = sparkContext.parallelize((0 to 1).map(i => data(i)))
+    rdd.toDF().registerTempTable("testTable1")
+    checkAnswer(sql("SELECT VALUE FROM TESTTABLE1 where KEY = 1"), Row("val_1"))
+    setConf(SQLConf.CASE_SENSITIVE, "true")
+  }
+
   test("SPARK-6145: ORDER BY test for nested fields") {
     jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil))
       .registerTempTable("nestedOrder")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
index 33c67355967dd..9d3090c19b4e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -18,25 +18,13 @@
 package org.apache.spark.sql.sources
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.Analyzer
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.test.TestSQLContext
 import org.scalatest.BeforeAndAfter
 
 abstract class DataSourceTest extends QueryTest with BeforeAndAfter {
-  // Case sensitivity is not configurable yet, but we want to test some edge cases.
-  // TODO: Remove when it is configurable
-  implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext) {
-    @transient
-    override protected[sql] lazy val analyzer: Analyzer =
-      new Analyzer(catalog, functionRegistry, caseSensitive = false) {
-        override val extendedResolutionRules =
-          PreInsertCastAndRename ::
-          Nil
+  // We want to test some edge cases.
+  implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext)
 
-        override val extendedCheckRules = Seq(
-          sources.PreWriteCheck(catalog)
-        )
-      }
-  }
+  caseInsensisitiveContext.setConf(SQLConf.CASE_SENSITIVE, "false")
 }
-
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 538c6c7f0a200..3bab648e31d1f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -46,6 +46,7 @@ import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, Query
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand}
 import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy}
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -329,7 +330,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   /* An analyzer that uses the Hive metastore. */
   @transient
   override protected[sql] lazy val analyzer =
-    new Analyzer(catalog, functionRegistry, caseSensitive = false) {
+    new Analyzer(catalog, functionRegistry, conf) {
       override val extendedResolutionRules =
         catalog.ParquetConversions ::
         catalog.CreateTables ::
@@ -350,6 +351,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   protected[hive] class SQLSession extends super.SQLSession {
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+      override def caseSensitiveAnalysis: Boolean =
+        getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean
     }
 
     /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 8fcdf3d0ab119..f5398605bc7eb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,26 +17,18 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.IOException
-import java.util.{List => JList}
-
 import com.google.common.base.Objects
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
 
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable}
-import org.apache.hadoop.hive.metastore.{TableType, Warehouse}
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.hive.metastore.Warehouse
 import org.apache.hadoop.hive.ql.metadata._
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc
-import org.apache.hadoop.hive.serde.serdeConstants
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
-import org.apache.hadoop.util.ReflectionUtils
+import org.apache.hadoop.hive.serde2.Deserializer
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.hive.client.IsolatedClientLoader
 import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog}
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
@@ -44,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
-import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, ResolvedDataSource}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -54,7 +46,7 @@ import scala.collection.JavaConversions._
 private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext)
   extends Catalog with Logging {
 
-  import org.apache.spark.sql.hive.HiveMetastoreTypes._
+  val conf = hive.conf
 
   /** Usages should lock on `this`. */
   protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf)
@@ -148,7 +140,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
     val tableProperties = new scala.collection.mutable.HashMap[String, String]
     tableProperties.put("spark.sql.sources.provider", provider)
     if (userSpecifiedSchema.isDefined) {
-      val threshold = hive.conf.schemaStringLengthThreshold
+      val threshold = conf.schemaStringLengthThreshold
       val schemaJsonString = userSpecifiedSchema.get.json
       // Split the JSON string.
       val parts = schemaJsonString.grouped(threshold).toSeq
@@ -355,7 +347,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
             // Inserting into partitioned table is not supported in Parquet data source (yet).
             if !relation.hiveQlTable.isPartitioned &&
               hive.convertMetastoreParquet &&
-              hive.conf.parquetUseDataSourceApi &&
+              conf.parquetUseDataSourceApi &&
               relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
           val parquetRelation = convertToParquetRelation(relation)
           val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -366,7 +358,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           // Inserting into partitioned table is not supported in Parquet data source (yet).
           if !relation.hiveQlTable.isPartitioned &&
             hive.convertMetastoreParquet &&
-            hive.conf.parquetUseDataSourceApi &&
+            conf.parquetUseDataSourceApi &&
             relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
           val parquetRelation = convertToParquetRelation(relation)
           val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -375,7 +367,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         // Read path
         case p @ PhysicalOperation(_, _, relation: MetastoreRelation)
             if hive.convertMetastoreParquet &&
-              hive.conf.parquetUseDataSourceApi &&
+              conf.parquetUseDataSourceApi &&
               relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
           val parquetRelation = convertToParquetRelation(relation)
           val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -435,7 +427,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
             desc.name,
-            hive.conf.defaultDataSourceName,
+            conf.defaultDataSourceName,
             temporary = false,
             mode,
             options = Map.empty[String, String],
@@ -464,7 +456,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
             tblName,
-            hive.conf.defaultDataSourceName,
+            conf.defaultDataSourceName,
             temporary = false,
             mode,
             options = Map.empty[String, String],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1f40a5340c2ce..1598d4bd47550 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -26,15 +26,16 @@ import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContaine
 import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution
 import org.apache.hadoop.hive.ql.processors._
-import org.apache.hadoop.hive.serde2.RegexSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.hive.serde2.avro.AvroSerDe
-import org.apache.spark.sql.SQLConf
+
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.execution.HiveNativeCommand
+import org.apache.spark.sql.SQLConf
 import org.apache.spark.util.Utils
 import org.apache.spark.{SparkConf, SparkContext}
 
@@ -103,10 +104,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     /** Fewer partitions to speed up testing. */
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
-
       // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared.
       // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql"
       override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql")
+      override def caseSensitiveAnalysis: Boolean =
+        getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 7d728fe87bda7..2c9c08a9f3898 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.spark.{SparkFiles, SparkException}
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index c605f1017547a..1353802604402 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -493,6 +493,12 @@ class SQLQuerySuite extends QueryTest {
     }
   }
 
+  test("SPARK-4699 HiveContext should be case insensitive by default") {
+    checkAnswer(
+      sql("SELECT KEY FROM Src ORDER BY value"),
+      sql("SELECT key FROM src ORDER BY value").collect().toSeq)
+  }
+
   test("SPARK-5284 Insert into Hive throws NPE when a inner complex type field has a null value") {
     val schema = StructType(
       StructField("s",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index d5dd0bf58e702..bf1121ddf0273 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -21,14 +21,12 @@ import java.io.File
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode}
+import org.apache.spark.sql.{QueryTest, SQLConf}
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD}
 import org.apache.spark.sql.hive.execution.HiveTableScan
-import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.json.JSONRelation
 import org.apache.spark.sql.sources.{InsertIntoDataSource, LogicalRelation}
 import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
 import org.apache.spark.sql.SaveMode

From 35c9599b94de759204ed33cdd46d8ee108bccd86 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 8 May 2015 15:48:39 -0700
Subject: [PATCH 052/320] [SPARK-5913] [MLLIB] Python API for ChiSqSelector

Add a Python API for mllib.feature.ChiSqSelector
https://issues.apache.org/jira/browse/SPARK-5913

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #5939 from yanboliang/spark-5913 and squashes the following commits:

cdaac99 [Yanbo Liang] Python API for ChiSqSelector
---
 .../mllib/api/python/PythonMLLibAPI.scala     | 10 ++++
 python/pyspark/mllib/feature.py               | 59 ++++++++++++++++++-
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 426306d78c1c3..8c30ad4b391ae 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -494,6 +494,16 @@ private[python] class PythonMLLibAPI extends Serializable {
     new StandardScaler(withMean, withStd).fit(data.rdd)
   }
 
+  /**
+   * Java stub for ChiSqSelector.fit(). This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   */
+  def fitChiSqSelector(numTopFeatures: Int, data: JavaRDD[LabeledPoint]): ChiSqSelectorModel = {
+    new ChiSqSelector(numTopFeatures).fit(data.rdd)
+  }
+
   /**
    * Java stub for IDF.fit(). This stub returns a
    * handle to the Java object instead of the content of the Java object.
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 1140539a24e95..aac305db6c19a 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -33,10 +33,12 @@
 from pyspark import SparkContext
 from pyspark.rdd import RDD, ignore_unicode_prefix
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import Vectors, _convert_to_vector
+from pyspark.mllib.linalg import Vectors, DenseVector, SparseVector, _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
 
 __all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler',
-           'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel']
+           'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel',
+           'ChiSqSelector', 'ChiSqSelectorModel']
 
 
 class VectorTransformer(object):
@@ -199,6 +201,59 @@ def fit(self, dataset):
         return StandardScalerModel(jmodel)
 
 
+class ChiSqSelectorModel(JavaVectorTransformer):
+    """
+    .. note:: Experimental
+
+    Represents a Chi Squared selector model.
+    """
+    def transform(self, vector):
+        """
+        Applies transformation on a vector.
+
+        :param vector: Vector or RDD of Vector to be transformed.
+        :return: transformed vector.
+        """
+        return JavaVectorTransformer.transform(self, vector)
+
+
+class ChiSqSelector(object):
+    """
+    .. note:: Experimental
+
+    Creates a ChiSquared feature selector.
+
+    >>> data = [
+    ...     LabeledPoint(0.0, SparseVector(3, {0: 8.0, 1: 7.0})),
+    ...     LabeledPoint(1.0, SparseVector(3, {1: 9.0, 2: 6.0})),
+    ...     LabeledPoint(1.0, [0.0, 9.0, 8.0]),
+    ...     LabeledPoint(2.0, [8.0, 9.0, 5.0])
+    ... ]
+    >>> model = ChiSqSelector(1).fit(sc.parallelize(data))
+    >>> model.transform(SparseVector(3, {1: 9.0, 2: 6.0}))
+    SparseVector(1, {0: 6.0})
+    >>> model.transform(DenseVector([8.0, 9.0, 5.0]))
+    DenseVector([5.0])
+    """
+    def __init__(self, numTopFeatures):
+        """
+        :param numTopFeatures: number of features that selector will select.
+        """
+        self.numTopFeatures = int(numTopFeatures)
+
+    def fit(self, data):
+        """
+        Returns a ChiSquared feature selector.
+
+        :param data: an `RDD[LabeledPoint]` containing the labeled dataset
+                 with categorical features. Real-valued features will be
+                 treated as categorical for each distinct value.
+                 Apply feature discretizer before using this function.
+        """
+        jmodel = callMLlibFunc("fitChiSqSelector", self.numTopFeatures, data)
+        return ChiSqSelectorModel(jmodel)
+
+
 class HashingTF(object):
     """
     .. note:: Experimental

From 1c78f6866ebbcfb41d9875bfa3c0b9fa23b188bf Mon Sep 17 00:00:00 2001
From: Brendan Collins <bcollins@blueraster.com>
Date: Fri, 8 May 2015 15:59:34 -0700
Subject: [PATCH 053/320] updated ec2 instance types

I needed to run some d2 instances, so I updated the spark_ec2.py accordingly

Author: Brendan Collins <bcollins@blueraster.com>

Closes #6014 from brendancol/ec2-instance-types-update and squashes the following commits:

d7b4191 [Brendan Collins] Merge branch 'ec2-instance-types-update' of github.com:brendancol/spark into ec2-instance-types-update
6366c45 [Brendan Collins] added back cc1.4xlarge
fc2931f [Brendan Collins] updated ec2 instance types
80c2aa6 [Brendan Collins] vertically aligned whitespace
85c6236 [Brendan Collins] vertically aligned whitespace
1657c26 [Brendan Collins] updated ec2 instance types
---
 ec2/spark_ec2.py | 70 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 23 deletions(-)

diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 87c0818279713..ab4a96f232c13 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -347,46 +347,57 @@ def get_validate_spark_version(version, repo):
 
 
 # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/
-# Last Updated: 2014-06-20
+# Last Updated: 2015-05-08
 # For easy maintainability, please keep this manually-inputted dictionary sorted by key.
 EC2_INSTANCE_TYPES = {
     "c1.medium":   "pvm",
     "c1.xlarge":   "pvm",
+    "c3.large":    "pvm",
+    "c3.xlarge":   "pvm",
     "c3.2xlarge":  "pvm",
     "c3.4xlarge":  "pvm",
     "c3.8xlarge":  "pvm",
-    "c3.large":    "pvm",
-    "c3.xlarge":   "pvm",
+    "c4.large":    "hvm",
+    "c4.xlarge":   "hvm",
+    "c4.2xlarge":  "hvm",
+    "c4.4xlarge":  "hvm",
+    "c4.8xlarge":  "hvm",
     "cc1.4xlarge": "hvm",
     "cc2.8xlarge": "hvm",
     "cg1.4xlarge": "hvm",
     "cr1.8xlarge": "hvm",
+    "d2.xlarge":   "hvm",
+    "d2.2xlarge":  "hvm",
+    "d2.4xlarge":  "hvm",
+    "d2.8xlarge":  "hvm",
+    "g2.2xlarge":  "hvm",
+    "g2.8xlarge":  "hvm",
     "hi1.4xlarge": "pvm",
     "hs1.8xlarge": "pvm",
+    "i2.xlarge":   "hvm",
     "i2.2xlarge":  "hvm",
     "i2.4xlarge":  "hvm",
     "i2.8xlarge":  "hvm",
-    "i2.xlarge":   "hvm",
-    "m1.large":    "pvm",
-    "m1.medium":   "pvm",
     "m1.small":    "pvm",
+    "m1.medium":   "pvm",
+    "m1.large":    "pvm",
     "m1.xlarge":   "pvm",
+    "m2.xlarge":   "pvm",
     "m2.2xlarge":  "pvm",
     "m2.4xlarge":  "pvm",
-    "m2.xlarge":   "pvm",
-    "m3.2xlarge":  "hvm",
-    "m3.large":    "hvm",
     "m3.medium":   "hvm",
+    "m3.large":    "hvm",
     "m3.xlarge":   "hvm",
+    "m3.2xlarge":  "hvm",
+    "r3.large":    "hvm",
+    "r3.xlarge":   "hvm",
     "r3.2xlarge":  "hvm",
     "r3.4xlarge":  "hvm",
     "r3.8xlarge":  "hvm",
-    "r3.large":    "hvm",
-    "r3.xlarge":   "hvm",
     "t1.micro":    "pvm",
-    "t2.medium":   "hvm",
     "t2.micro":    "hvm",
     "t2.small":    "hvm",
+    "t2.medium":   "hvm",
 }
 
 
@@ -878,44 +889,57 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state):
 # Get number of local disks available for a given EC2 instance type.
 def get_num_disks(instance_type):
     # Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html
-    # Last Updated: 2014-06-20
+    # Last Updated: 2015-05-08
     # For easy maintainability, please keep this manually-inputted dictionary sorted by key.
     disks_by_instance = {
         "c1.medium":   1,
         "c1.xlarge":   4,
+        "c3.large":    2,
+        "c3.xlarge":   2,
         "c3.2xlarge":  2,
         "c3.4xlarge":  2,
         "c3.8xlarge":  2,
-        "c3.large":    2,
-        "c3.xlarge":   2,
+        "c4.large":    0,
+        "c4.xlarge":   0,
+        "c4.2xlarge":  0,
+        "c4.4xlarge":  0,
+        "c4.8xlarge":  0,
         "cc1.4xlarge": 2,
         "cc2.8xlarge": 4,
         "cg1.4xlarge": 2,
         "cr1.8xlarge": 2,
+        "d2.xlarge":   3,
+        "d2.2xlarge":  6,
+        "d2.4xlarge":  12,
+        "d2.8xlarge":  24,
         "g2.2xlarge":  1,
+        "g2.8xlarge":  2,
         "hi1.4xlarge": 2,
         "hs1.8xlarge": 24,
+        "i2.xlarge":   1,
         "i2.2xlarge":  2,
         "i2.4xlarge":  4,
         "i2.8xlarge":  8,
-        "i2.xlarge":   1,
-        "m1.large":    2,
-        "m1.medium":   1,
         "m1.small":    1,
+        "m1.medium":   1,
+        "m1.large":    2,
         "m1.xlarge":   4,
+        "m2.xlarge":   1,
         "m2.2xlarge":  1,
         "m2.4xlarge":  2,
-        "m2.xlarge":   1,
-        "m3.2xlarge":  2,
-        "m3.large":    1,
         "m3.medium":   1,
+        "m3.large":    1,
         "m3.xlarge":   2,
+        "m3.2xlarge":  2,
+        "r3.large":    1,
+        "r3.xlarge":   1,
         "r3.2xlarge":  1,
         "r3.4xlarge":  1,
         "r3.8xlarge":  2,
-        "r3.large":    1,
-        "r3.xlarge":   1,
         "t1.micro":    0,
+        "t2.micro":    0,
+        "t2.small":    0,
+        "t2.medium":   0,
     }
     if instance_type in disks_by_instance:
         return disks_by_instance[instance_type]

From ffdc40ce7a799f2564f57b958d0f32f1d1636488 Mon Sep 17 00:00:00 2001
From: Aaron Davidson <aaron@databricks.com>
Date: Fri, 8 May 2015 17:13:55 -0700
Subject: [PATCH 054/320] [SPARK-6955] Perform port retries at
 NettyBlockTransferService level

Currently we're doing port retries in the TransportServer level, but this is not specified by the TransportContext API and it has other further-reaching impacts like causing undesirable behavior for the Yarn and Standalone shuffle services.

Author: Aaron Davidson <aaron@databricks.com>

Closes #5575 from aarondav/port-bind and squashes the following commits:

3c2d6ed [Aaron Davidson] Oops, never do it.
a5d9432 [Aaron Davidson] Remove shouldHostShuffleServiceIfEnabled
e901eb2 [Aaron Davidson] fix local-cluster mode for ExternalShuffleServiceSuite
59e5e38 [Aaron Davidson] [SPARK-6955] Perform port retries at NettyBlockTransferService level
---
 .../spark/deploy/LocalSparkCluster.scala      |  4 +-
 .../netty/NettyBlockTransferService.scala     | 14 +++-
 .../NettyBlockTransferServiceSuite.scala      | 78 +++++++++++++++++++
 .../spark/network/server/TransportServer.java | 45 +++--------
 4 files changed, 102 insertions(+), 39 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index f0e77c2ba982b..860e1a24901b6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -48,7 +48,9 @@ class LocalSparkCluster(
     logInfo("Starting a local Spark cluster with " + numWorkers + " workers.")
 
     // Disable REST server on Master in this mode unless otherwise specified
-    val _conf = conf.clone().setIfMissing("spark.master.rest.enabled", "false")
+    val _conf = conf.clone()
+      .setIfMissing("spark.master.rest.enabled", "false")
+      .set("spark.shuffle.service.enabled", "false")
 
     /* Start the Master */
     val (masterSystem, masterPort, _, _) = Master.startSystemAndActor(localHostname, 0, 0, _conf)
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 6181c0ee9fa2b..d650d5fe73087 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -59,12 +59,22 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
     }
     transportContext = new TransportContext(transportConf, rpcHandler)
     clientFactory = transportContext.createClientFactory(clientBootstrap.toList)
-    server = transportContext.createServer(conf.getInt("spark.blockManager.port", 0),
-      serverBootstrap.toList)
+    server = createServer(serverBootstrap.toList)
     appId = conf.getAppId
     logInfo("Server created on " + server.getPort)
   }
 
+  /** Creates and binds the TransportServer, possibly trying multiple ports. */
+  private def createServer(bootstraps: List[TransportServerBootstrap]): TransportServer = {
+    def startService(port: Int): (TransportServer, Int) = {
+      val server = transportContext.createServer(port, bootstraps)
+      (server, server.getPort)
+    }
+
+    val portToTry = conf.getInt("spark.blockManager.port", 0)
+    Utils.startServiceOnPort(portToTry, startService, conf, getClass.getName)._1
+  }
+
   override def fetchBlocks(
       host: String,
       port: Int,
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
new file mode 100644
index 0000000000000..a41f8b7ce5ce0
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import org.apache.spark.network.BlockDataManager
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.mockito.Mockito.mock
+import org.scalatest._
+
+class NettyBlockTransferServiceSuite extends FunSuite with BeforeAndAfterEach with ShouldMatchers {
+  private var service0: NettyBlockTransferService = _
+  private var service1: NettyBlockTransferService = _
+
+  override def afterEach() {
+    if (service0 != null) {
+      service0.close()
+      service0 = null
+    }
+
+    if (service1 != null) {
+      service1.close()
+      service1 = null
+    }
+  }
+
+  test("can bind to a random port") {
+    service0 = createService(port = 0)
+    service0.port should not be 0
+  }
+
+  test("can bind to two random ports") {
+    service0 = createService(port = 0)
+    service1 = createService(port = 0)
+    service0.port should not be service1.port
+  }
+
+  test("can bind to a specific port") {
+    val port = 17634
+    service0 = createService(port)
+    service0.port should be >= port
+    service0.port should be <= (port + 10) // avoid testing equality in case of simultaneous tests
+  }
+
+  test("can bind to a specific port twice and the second increments") {
+    val port = 17634
+    service0 = createService(port)
+    service1 = createService(port)
+    service0.port should be >= port
+    service0.port should be <= (port + 10)
+    service1.port should be (service0.port + 1)
+  }
+
+  private def createService(port: Int): NettyBlockTransferService = {
+    val conf = new SparkConf()
+      .set("spark.app.id", s"test-${getClass.getName}")
+      .set("spark.blockManager.port", port.toString)
+    val securityManager = new SecurityManager(conf)
+    val blockDataManager = mock(classOf[BlockDataManager])
+    val service = new NettyBlockTransferService(conf, securityManager, numCores = 1)
+    service.init(blockDataManager)
+    service
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 941ef95772e16..f4fadb1ee3b8d 100644
--- a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -31,6 +31,7 @@
 import io.netty.channel.ChannelOption;
 import io.netty.channel.EventLoopGroup;
 import io.netty.channel.socket.SocketChannel;
+import org.apache.spark.network.util.JavaUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,7 +66,12 @@ public TransportServer(
     this.appRpcHandler = appRpcHandler;
     this.bootstraps = Lists.newArrayList(Preconditions.checkNotNull(bootstraps));
 
-    init(portToBind);
+    try {
+      init(portToBind);
+    } catch (RuntimeException e) {
+      JavaUtils.closeQuietly(this);
+      throw e;
+    }
   }
 
   public int getPort() {
@@ -114,7 +120,8 @@ protected void initChannel(SocketChannel ch) throws Exception {
       }
     });
 
-    bindRightPort(portToBind);
+    channelFuture = bootstrap.bind(new InetSocketAddress(portToBind));
+    channelFuture.syncUninterruptibly();
 
     port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
     logger.debug("Shuffle server started on port :" + port);
@@ -135,38 +142,4 @@ public void close() {
     }
     bootstrap = null;
   }
-
-  /**
-   * Attempt to bind to the specified port up to a fixed number of retries.
-   * If all attempts fail after the max number of retries, exit.
-   */
-  private void bindRightPort(int portToBind) {
-    int maxPortRetries = conf.portMaxRetries();
-
-    for (int i = 0; i <= maxPortRetries; i++) {
-      int tryPort = -1;
-      if (0 == portToBind) {
-        // Do not increment port if tryPort is 0, which is treated as a special port
-        tryPort = 0;
-      } else {
-        // If the new port wraps around, do not try a privilege port
-        tryPort = ((portToBind + i - 1024) % (65536 - 1024)) + 1024;
-      }
-      try {
-        channelFuture = bootstrap.bind(new InetSocketAddress(tryPort));
-        channelFuture.syncUninterruptibly();
-        return;
-      } catch (Exception e) {
-        logger.warn("Netty service could not bind on port " + tryPort +
-          ". Attempting the next port.");
-        if (i >= maxPortRetries) {
-          logger.error(e.getMessage() + ": Netty server failed after "
-            + maxPortRetries + " retries.");
-
-          // If it can't find a right port, it should exit directly.
-          System.exit(-1);
-        }
-      }
-    }
-  }
 }

From bd61f07039064833108070e19b752d4c46045766 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Fri, 8 May 2015 17:15:10 -0700
Subject: [PATCH 055/320] [SPARK-7469] [SQL] DAG visualization: show SQL query
 operators

The DAG visualization currently displays only low-level Spark primitives (e.g. `map`, `reduceByKey`, `filter` etc.). For SQL, these aren't particularly useful. Instead, we should display higher level physical operators (e.g. `Filter`, `Exchange`, `ShuffleHashJoin`). cc marmbrus

-----------------
**Before**
<img src="https://issues.apache.org/jira/secure/attachment/12731586/before.png" width="600px"/>
-----------------
**After** (Pay attention to the words)
<img src="https://issues.apache.org/jira/secure/attachment/12731587/after.png" width="600px"/>
-----------------

Author: Andrew Or <andrew@databricks.com>

Closes #5999 from andrewor14/dag-viz-sql and squashes the following commits:

0db23a4 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-sql
1e211db [Andrew Or] Update comment
0d49fd6 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-sql
ffd237a [Andrew Or] Fix style
202dac1 [Andrew Or] Make ignoreParent false by default
e61b1ab [Andrew Or] Visualize SQL operators, not low-level Spark primitives
569034a [Andrew Or] Add a flag to ignore parent settings and scopes
---
 .../apache/spark/rdd/RDDOperationScope.scala  | 20 +++++++++-----
 .../columnar/InMemoryColumnarTableScan.scala  |  2 +-
 .../spark/sql/execution/Aggregate.scala       |  2 +-
 .../apache/spark/sql/execution/Exchange.scala |  2 +-
 .../spark/sql/execution/ExistingRDD.scala     |  2 +-
 .../apache/spark/sql/execution/Expand.scala   |  2 +-
 .../apache/spark/sql/execution/Generate.scala |  2 +-
 .../sql/execution/GeneratedAggregate.scala    |  2 +-
 .../spark/sql/execution/LocalTableScan.scala  |  2 +-
 .../spark/sql/execution/SparkPlan.scala       | 19 +++++++++++---
 .../apache/spark/sql/execution/Window.scala   |  2 +-
 .../spark/sql/execution/basicOperators.scala  | 26 +++++++++----------
 .../apache/spark/sql/execution/commands.scala |  2 +-
 .../spark/sql/execution/debug/package.scala   |  4 +--
 .../execution/joins/BroadcastHashJoin.scala   |  2 +-
 .../joins/BroadcastLeftSemiJoinHash.scala     |  2 +-
 .../joins/BroadcastNestedLoopJoin.scala       |  2 +-
 .../execution/joins/CartesianProduct.scala    |  2 +-
 .../sql/execution/joins/HashOuterJoin.scala   |  2 +-
 .../sql/execution/joins/LeftSemiJoinBNL.scala |  2 +-
 .../execution/joins/LeftSemiJoinHash.scala    |  2 +-
 .../execution/joins/ShuffledHashJoin.scala    |  2 +-
 .../sql/execution/joins/SortMergeJoin.scala   |  2 +-
 .../spark/sql/execution/pythonUdfs.scala      |  2 +-
 .../sql/parquet/ParquetTableOperations.scala  |  4 +--
 .../sql/hive/execution/HiveTableScan.scala    |  2 +-
 .../hive/execution/InsertIntoHiveTable.scala  |  4 ++-
 .../hive/execution/ScriptTransformation.scala |  2 +-
 28 files changed, 71 insertions(+), 50 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 9440d456edf15..93ec606f2de7d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -102,16 +102,21 @@ private[spark] object RDDOperationScope {
   /**
    * Execute the given body such that all RDDs created in this body will have the same scope.
    *
-   * If nesting is allowed, this concatenates the previous scope with the new one in a way that
-   * signifies the hierarchy. Otherwise, if nesting is not allowed, then any children calls to
-   * this method executed in the body will have no effect.
+   * If nesting is allowed, any subsequent calls to this method in the given body will instantiate
+   * child scopes that are nested within our scope. Otherwise, these calls will take no effect.
+   *
+   * Additionally, the caller of this method may optionally ignore the configurations and scopes
+   * set by the higher level caller. In this case, this method will ignore the parent caller's
+   * intention to disallow nesting, and the new scope instantiated will not have a parent. This
+   * is useful for scoping physical operations in Spark SQL, for instance.
    *
    * Note: Return statements are NOT allowed in body.
    */
   private[spark] def withScope[T](
       sc: SparkContext,
       name: String,
-      allowNesting: Boolean)(body: => T): T = {
+      allowNesting: Boolean,
+      ignoreParent: Boolean = false)(body: => T): T = {
     // Save the old scope to restore it later
     val scopeKey = SparkContext.RDD_SCOPE_KEY
     val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY
@@ -119,8 +124,11 @@ private[spark] object RDDOperationScope {
     val oldScope = Option(oldScopeJson).map(RDDOperationScope.fromJson)
     val oldNoOverride = sc.getLocalProperty(noOverrideKey)
     try {
-      // Set the scope only if the higher level caller allows us to do so
-      if (sc.getLocalProperty(noOverrideKey) == null) {
+      if (ignoreParent) {
+        // Ignore all parent settings and scopes and start afresh with our own root scope
+        sc.setLocalProperty(scopeKey, new RDDOperationScope(name).toJson)
+      } else if (sc.getLocalProperty(noOverrideKey) == null) {
+        // Otherwise, set the scope only if the higher level caller allows us to do so
         sc.setLocalProperty(scopeKey, new RDDOperationScope(name, oldScope).toJson)
       }
       // Optionally disallow the child body to override our scope
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
index d9b6fb43ab83d..0ded1cce68391 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@@ -267,7 +267,7 @@ private[sql] case class InMemoryColumnarTableScan(
 
   private val inMemoryPartitionPruningEnabled = sqlContext.conf.inMemoryPartitionPruning
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     if (enableAccumulators) {
       readPartitions.setValue(0)
       readBatches.setValue(0)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
index 18b1ba4c5c4b9..8d16749697aa2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
@@ -121,7 +121,7 @@ case class Aggregate(
     }
   }
 
-  override def execute(): RDD[Row] = attachTree(this, "execute") {
+  protected override def doExecute(): RDD[Row] = attachTree(this, "execute") {
     if (groupingExpressions.isEmpty) {
       child.execute().mapPartitions { iter =>
         val buffer = newAggregateBuffer()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index f0d54cd6cd94f..f02fa81e95e84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -109,7 +109,7 @@ case class Exchange(
     serializer
   }
 
-  override def execute(): RDD[Row] = attachTree(this , "execute") {
+  protected override def doExecute(): RDD[Row] = attachTree(this , "execute") {
     newPartitioning match {
       case HashPartitioning(expressions, numPartitions) =>
         // TODO: Eliminate redundant expressions in grouping key and value.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 57effbf7ec501..a500269f3cdcf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -106,7 +106,7 @@ private[sql] case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlCon
 
 /** Physical plan node for scanning data from an RDD. */
 private[sql] case class PhysicalRDD(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
-  override def execute(): RDD[Row] = rdd
+  protected override def doExecute(): RDD[Row] = rdd
 }
 
 /** Logical plan node for scanning data from a local collection. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
index 575849481faad..f16ca36909fab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
@@ -43,7 +43,7 @@ case class Expand(
   // as UNKNOWN partitioning
   override def outputPartitioning: Partitioning = UnknownPartitioning(0)
 
-  override def execute(): RDD[Row] = attachTree(this, "execute") {
+  protected override def doExecute(): RDD[Row] = attachTree(this, "execute") {
     child.execute().mapPartitions { iter =>
       // TODO Move out projection objects creation and transfer to
       // workers via closure. However we can't assume the Projection
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index 5201e20a10565..08d9079335132 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -46,7 +46,7 @@ case class Generate(
 
   val boundGenerator = BindReferences.bindReference(generator, child.output)
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     if (join) {
       child.execute().mapPartitions { iter =>
         val nullValues = Seq.fill(generator.elementTypes.size)(Literal(null))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
index 5d9f202681045..2ec7d4fbc92de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
@@ -66,7 +66,7 @@ case class GeneratedAggregate(
 
   override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val aggregatesToCompute = aggregateExpressions.flatMap { a =>
       a.collect { case agg: AggregateExpression => agg}
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
index ace9af5f384c9..03bee80ad7f38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
@@ -30,7 +30,7 @@ private[sql] case class LocalTableScan(output: Seq[Attribute], rows: Seq[Row]) e
 
   private lazy val rdd = sqlContext.sparkContext.parallelize(rows)
 
-  override def execute(): RDD[Row] = rdd
+  protected override def doExecute(): RDD[Row] = rdd
 
 
   override def executeCollect(): Array[Row] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 59c89800da00f..435ac011178de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, trees}
 import org.apache.spark.sql.catalyst.expressions._
@@ -79,14 +79,25 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq.fill(children.size)(Nil)
 
   /**
-   * Runs this query returning the result as an RDD.
+   * Returns the result of this query as an RDD[Row] by delegating to doExecute
+   * after adding query plan information to created RDDs for visualization.
+   * Concrete implementations of SparkPlan should override doExecute instead.
    */
-  def execute(): RDD[Row]
+  final def execute(): RDD[Row] = {
+    RDDOperationScope.withScope(sparkContext, nodeName, false, true) {
+      doExecute()
+    }
+  }
 
   /**
-   * Runs this query returning the result as an array.
+   * Overridden by concrete implementations of SparkPlan.
+   * Produces the result of the query as an RDD[Row]
    */
+  protected def doExecute(): RDD[Row]
 
+  /**
+   * Runs this query returning the result as an array.
+   */
   def executeCollect(): Array[Row] = {
     execute().mapPartitions { iter =>
       val converter = CatalystTypeConverters.createToScalaConverter(schema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala
index 217b559def512..c4327ce262ac5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala
@@ -112,7 +112,7 @@ case class Window(
     }
   }
 
-  def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     child.execute().mapPartitions { iter =>
       new Iterator[Row] {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index 5ca11e67a9434..6cb67b4bbbb65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -37,7 +37,7 @@ case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends
 
   @transient lazy val buildProjection = newMutableProjection(projectList, child.output)
 
-  override def execute(): RDD[Row] = child.execute().mapPartitions { iter =>
+  protected override def doExecute(): RDD[Row] = child.execute().mapPartitions { iter =>
     val resuableProjection = buildProjection()
     iter.map(resuableProjection)
   }
@@ -54,7 +54,7 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
 
   @transient lazy val conditionEvaluator: (Row) => Boolean = newPredicate(condition, child.output)
 
-  override def execute(): RDD[Row] = child.execute().mapPartitions { iter =>
+  protected override def doExecute(): RDD[Row] = child.execute().mapPartitions { iter =>
     iter.filter(conditionEvaluator)
   }
 
@@ -83,7 +83,7 @@ case class Sample(
   override def output: Seq[Attribute] = child.output
 
   // TODO: How to pick seed?
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     if (withReplacement) {
       child.execute().map(_.copy()).sample(withReplacement, upperBound - lowerBound, seed)
     } else {
@@ -99,7 +99,7 @@ case class Sample(
 case class Union(children: Seq[SparkPlan]) extends SparkPlan {
   // TODO: attributes output by union should be distinct for nullability purposes
   override def output: Seq[Attribute] = children.head.output
-  override def execute(): RDD[Row] = sparkContext.union(children.map(_.execute()))
+  protected override def doExecute(): RDD[Row] = sparkContext.union(children.map(_.execute()))
 }
 
 /**
@@ -124,7 +124,7 @@ case class Limit(limit: Int, child: SparkPlan)
 
   override def executeCollect(): Array[Row] = child.executeTake(limit)
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val rdd: RDD[_ <: Product2[Boolean, Row]] = if (sortBasedShuffleOn) {
       child.execute().mapPartitions { iter =>
         iter.take(limit).map(row => (false, row.copy()))
@@ -166,7 +166,7 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
 
   // TODO: Terminal split should be implemented differently from non-terminal split.
   // TODO: Pick num splits based on |limit|.
-  override def execute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1)
+  protected override def doExecute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1)
 
   override def outputOrdering: Seq[SortOrder] = sortOrder
 }
@@ -186,7 +186,7 @@ case class Sort(
   override def requiredChildDistribution: Seq[Distribution] =
     if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
-  override def execute(): RDD[Row] = attachTree(this, "sort") {
+  protected override def doExecute(): RDD[Row] = attachTree(this, "sort") {
     child.execute().mapPartitions( { iterator =>
       val ordering = newOrdering(sortOrder, child.output)
       iterator.map(_.copy()).toArray.sorted(ordering).iterator
@@ -214,7 +214,7 @@ case class ExternalSort(
   override def requiredChildDistribution: Seq[Distribution] =
     if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
-  override def execute(): RDD[Row] = attachTree(this, "sort") {
+  protected override def doExecute(): RDD[Row] = attachTree(this, "sort") {
     child.execute().mapPartitions( { iterator =>
       val ordering = newOrdering(sortOrder, child.output)
       val sorter = new ExternalSorter[Row, Null, Row](ordering = Some(ordering))
@@ -244,7 +244,7 @@ case class Distinct(partial: Boolean, child: SparkPlan) extends UnaryNode {
   override def requiredChildDistribution: Seq[Distribution] =
     if (partial) UnspecifiedDistribution :: Nil else ClusteredDistribution(child.output) :: Nil
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     child.execute().mapPartitions { iter =>
       val hashSet = new scala.collection.mutable.HashSet[Row]()
 
@@ -270,7 +270,7 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: SparkPlan)
   extends UnaryNode {
   override def output: Seq[Attribute] = child.output
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     child.execute().map(_.copy()).coalesce(numPartitions, shuffle)
   }
 }
@@ -285,7 +285,7 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: SparkPlan)
 case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode {
   override def output: Seq[Attribute] = left.output
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     left.execute().map(_.copy()).subtract(right.execute().map(_.copy()))
   }
 }
@@ -299,7 +299,7 @@ case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode {
 case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
   override def output: Seq[Attribute] = children.head.output
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     left.execute().map(_.copy()).intersection(right.execute().map(_.copy()))
   }
 }
@@ -314,5 +314,5 @@ case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
 case class OutputFaker(output: Seq[Attribute], child: SparkPlan) extends SparkPlan {
   def children: Seq[SparkPlan] = child :: Nil
 
-  def execute(): RDD[Row] = child.execute()
+  protected override def doExecute(): RDD[Row] = child.execute()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 388a8184e4cfe..49b361e96b2d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -64,7 +64,7 @@ private[sql] case class ExecutedCommand(cmd: RunnableCommand) extends SparkPlan
 
   override def executeTake(limit: Int): Array[Row] = sideEffectResult.take(limit).toArray
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val converted = sideEffectResult.map(r =>
       CatalystTypeConverters.convertToCatalyst(r, schema).asInstanceOf[Row])
     sqlContext.sparkContext.parallelize(converted, 1)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 710787096e6cb..dffb265601bdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -125,7 +125,7 @@ package object debug {
       }
     }
 
-    def execute(): RDD[Row] = {
+    protected override def doExecute(): RDD[Row] = {
       child.execute().mapPartitions { iter =>
         new Iterator[Row] {
           def hasNext: Boolean = iter.hasNext
@@ -193,7 +193,7 @@ package object debug {
 
     def children: List[SparkPlan] = child :: Nil
 
-    def execute(): RDD[Row] = {
+    protected override def doExecute(): RDD[Row] = {
       child.execute().map { row =>
         try typeCheck(row, child.schema) catch {
           case e: Exception =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
index 926f5e6c137ee..05dd5681edfac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
@@ -66,7 +66,7 @@ case class BroadcastHashJoin(
     sparkContext.broadcast(hashed)
   }
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val broadcastRelation = Await.result(broadcastFuture, timeout)
 
     streamedPlan.execute().mapPartitions { streamedIter =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
index 3ef1e0d7fbdd4..640fc26ba3baa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
@@ -38,7 +38,7 @@ case class BroadcastLeftSemiJoinHash(
 
   override def output: Seq[Attribute] = left.output
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val buildIter= buildPlan.execute().map(_.copy()).collect().toIterator
     val hashSet = new java.util.HashSet[Row]()
     var currentRow: Row = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
index 6aaf35fb429e2..caad3dfbe1c5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
@@ -61,7 +61,7 @@ case class BroadcastNestedLoopJoin(
   @transient private lazy val boundCondition =
     newPredicate(condition.getOrElse(Literal(true)), left.output ++ right.output)
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val broadcastedRelation =
       sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
index 1cbc98354d673..191c00cb55da2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
 case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode {
   override def output: Seq[Attribute] = left.output ++ right.output
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val leftResults = left.execute().map(_.copy())
     val rightResults = right.execute().map(_.copy())
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
index a396c0f5d56ee..45574392996ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
@@ -183,7 +183,7 @@ case class HashOuterJoin(
     hashTable
   }
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val joinedRow = new JoinedRow()
     left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
       // TODO this probably can be replaced by external sort (sort merged join?)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
index b03af410dca08..036423e6faea4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
@@ -47,7 +47,7 @@ case class LeftSemiJoinBNL(
   @transient private lazy val boundCondition =
     newPredicate(condition.getOrElse(Literal(true)), left.output ++ right.output)
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val broadcastedRelation =
       sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
index a04f2a63b5a55..8ad27eae80ffb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
@@ -42,7 +42,7 @@ case class LeftSemiJoinHash(
 
   override def output: Seq[Attribute] = left.output
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
       val hashSet = new java.util.HashSet[Row]()
       var currentRow: Row = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
index a6cd8337c1c3e..219525d9d85f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
@@ -43,7 +43,7 @@ case class ShuffledHashJoin(
   override def requiredChildDistribution: Seq[ClusteredDistribution] =
     ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
       val hashed = HashedRelation(buildIter, buildSideKeyGenerator)
       hashJoin(streamIter, hashed)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala
index b5123668ba11e..1a39fb4b96608 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala
@@ -60,7 +60,7 @@ case class SortMergeJoin(
   private def requiredOrders(keys: Seq[Expression]): Seq[SortOrder] =
     keys.map(SortOrder(_, Ascending))
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val leftResults = left.execute().map(_.copy())
     val rightResults = right.execute().map(_.copy())
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index 58cb1980f217e..3dbc3837950e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -228,7 +228,7 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child:
 
   def children: Seq[SparkPlan] = child :: Nil
 
-  def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     val childResults = child.execute().map(_.copy())
 
     val parent = childResults.mapPartitions { iter =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index aded126ea0615..75ac52d4a98ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -77,7 +77,7 @@ private[sql] case class ParquetTableScan(
     }
   }.toArray
 
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     import parquet.filter2.compat.FilterCompat.FilterPredicateCompat
 
     val sc = sqlContext.sparkContext
@@ -255,7 +255,7 @@ private[sql] case class InsertIntoParquetTable(
   /**
    * Inserts all rows into the Parquet file.
    */
-  override def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     // TODO: currently we do not check whether the "schema"s are compatible
     // That means if one first creates a table and then INSERTs data with
     // and incompatible schema the execution will fail. It would be nice
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 0a5f19eee7105..62dc4167b78dd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -129,7 +129,7 @@ case class HiveTableScan(
     }
   }
 
-  override def execute(): RDD[Row] = if (!relation.hiveQlTable.isPartitioned) {
+  protected override def doExecute(): RDD[Row] = if (!relation.hiveQlTable.isPartitioned) {
     hadoopReader.makeRDDForTable(relation.hiveQlTable)
   } else {
     hadoopReader.makeRDDForPartitionedTable(prunePartitions(relation.hiveQlPartitions))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index de8954d5dec99..c0b0b104e9142 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -258,5 +258,7 @@ case class InsertIntoHiveTable(
 
   override def executeCollect(): Array[Row] = sideEffectResult.toArray
 
-  override def execute(): RDD[Row] = sqlContext.sparkContext.parallelize(sideEffectResult, 1)
+  protected override def doExecute(): RDD[Row] = {
+    sqlContext.sparkContext.parallelize(sideEffectResult, 1)
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index 3eddda3b28c66..bfd26e0170c70 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -54,7 +54,7 @@ case class ScriptTransformation(
 
   override def otherCopyArgs: Seq[HiveContext] = sc :: Nil
 
-  def execute(): RDD[Row] = {
+  protected override def doExecute(): RDD[Row] = {
     child.execute().mapPartitions { iter =>
       val cmd = List("/bin/bash", "-c", script)
       val builder = new ProcessBuilder(cmd)

From 54e6fa0563ffa8788ec2fd1b8740445ef3c2ce5a Mon Sep 17 00:00:00 2001
From: tedyu <yuzhihong@gmail.com>
Date: Fri, 8 May 2015 17:16:38 -0700
Subject: [PATCH 056/320] [SPARK-7237] Clean function in several RDD methods

Author: tedyu <yuzhihong@gmail.com>

Closes #5959 from ted-yu/master and squashes the following commits:

f83d445 [tedyu] Move cleaning outside of mapPartitionsWithIndex
56d7c92 [tedyu] Consolidate import of Random
f6014c0 [tedyu] Remove cleaning in RDD#filterWith
36feb6c [tedyu] Try to get correct syntax
55d01eb [tedyu] Try to get correct syntax
c2786df [tedyu] Correct syntax
d92bfcf [tedyu] Correct syntax in test
164d3e4 [tedyu] Correct variable name
8b50d93 [tedyu] Address Andrew's review comments
0c8d47e [tedyu] Add test for mapWith()
6846e40 [tedyu] Add test for flatMapWith()
6c124a9 [tedyu] Clean function in several RDD methods
---
 .../main/scala/org/apache/spark/rdd/RDD.scala | 30 ++++++++++++-------
 .../spark/util/ClosureCleanerSuite.scala      | 21 +++++++++++++
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index b3b60578c92e8..8baf199f215fb 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -717,7 +717,8 @@ abstract class RDD[T: ClassTag](
   def mapPartitionsWithContext[U: ClassTag](
       f: (TaskContext, Iterator[T]) => Iterator[U],
       preservesPartitioning: Boolean = false): RDD[U] = withScope {
-    val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(context, iter)
+    val cleanF = sc.clean(f)
+    val func = (context: TaskContext, index: Int, iter: Iterator[T]) => cleanF(context, iter)
     new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning)
   }
 
@@ -741,9 +742,11 @@ abstract class RDD[T: ClassTag](
   def mapWith[A, U: ClassTag]
       (constructA: Int => A, preservesPartitioning: Boolean = false)
       (f: (T, A) => U): RDD[U] = withScope {
+    val cleanF = sc.clean(f)
+    val cleanA = sc.clean(constructA)
     mapPartitionsWithIndex((index, iter) => {
-      val a = constructA(index)
-      iter.map(t => f(t, a))
+      val a = cleanA(index)
+      iter.map(t => cleanF(t, a))
     }, preservesPartitioning)
   }
 
@@ -756,9 +759,11 @@ abstract class RDD[T: ClassTag](
   def flatMapWith[A, U: ClassTag]
       (constructA: Int => A, preservesPartitioning: Boolean = false)
       (f: (T, A) => Seq[U]): RDD[U] = withScope {
+    val cleanF = sc.clean(f)
+    val cleanA = sc.clean(constructA)
     mapPartitionsWithIndex((index, iter) => {
-      val a = constructA(index)
-      iter.flatMap(t => f(t, a))
+      val a = cleanA(index)
+      iter.flatMap(t => cleanF(t, a))
     }, preservesPartitioning)
   }
 
@@ -769,9 +774,11 @@ abstract class RDD[T: ClassTag](
    */
   @deprecated("use mapPartitionsWithIndex and foreach", "1.0.0")
   def foreachWith[A](constructA: Int => A)(f: (T, A) => Unit): Unit = withScope {
+    val cleanF = sc.clean(f)
+    val cleanA = sc.clean(constructA)
     mapPartitionsWithIndex { (index, iter) =>
-      val a = constructA(index)
-      iter.map(t => {f(t, a); t})
+      val a = cleanA(index)
+      iter.map(t => {cleanF(t, a); t})
     }
   }
 
@@ -782,9 +789,11 @@ abstract class RDD[T: ClassTag](
    */
   @deprecated("use mapPartitionsWithIndex and filter", "1.0.0")
   def filterWith[A](constructA: Int => A)(p: (T, A) => Boolean): RDD[T] = withScope {
+    val cleanP = sc.clean(p)
+    val cleanA = sc.clean(constructA)
     mapPartitionsWithIndex((index, iter) => {
-      val a = constructA(index)
-      iter.filter(t => p(t, a))
+      val a = cleanA(index)
+      iter.filter(t => cleanP(t, a))
     }, preservesPartitioning = true)
   }
 
@@ -901,7 +910,8 @@ abstract class RDD[T: ClassTag](
    * Return an RDD that contains all matching values by applying `f`.
    */
   def collect[U: ClassTag](f: PartialFunction[T, U]): RDD[U] = withScope {
-    filter(f.isDefinedAt).map(f)
+    val cleanF = sc.clean(f)
+    filter(cleanF.isDefinedAt).map(cleanF)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index 446c3f24a74dd..e41f6ee27764e 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.util
 
 import java.io.NotSerializableException
+import java.util.Random
 
 import org.scalatest.FunSuite
 
@@ -92,6 +93,11 @@ class ClosureCleanerSuite extends FunSuite {
       expectCorrectException { TestUserClosuresActuallyCleaned.testKeyBy(rdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testMapPartitions(rdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testMapPartitionsWithIndex(rdd) }
+      expectCorrectException { TestUserClosuresActuallyCleaned.testMapPartitionsWithContext(rdd) }
+      expectCorrectException { TestUserClosuresActuallyCleaned.testFlatMapWith(rdd) }
+      expectCorrectException { TestUserClosuresActuallyCleaned.testFilterWith(rdd) }
+      expectCorrectException { TestUserClosuresActuallyCleaned.testForEachWith(rdd) }
+      expectCorrectException { TestUserClosuresActuallyCleaned.testMapWith(rdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testZipPartitions2(rdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testZipPartitions3(rdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testZipPartitions4(rdd) }
@@ -260,6 +266,21 @@ private object TestUserClosuresActuallyCleaned {
   def testMapPartitionsWithIndex(rdd: RDD[Int]): Unit = {
     rdd.mapPartitionsWithIndex { (_, it) => return; it }.count()
   }
+  def testFlatMapWith(rdd: RDD[Int]): Unit = {
+    rdd.flatMapWith ((index: Int) => new Random(index + 42)){ (_, it) => return; Seq() }.count()
+  }
+  def testMapWith(rdd: RDD[Int]): Unit = {
+    rdd.mapWith ((index: Int) => new Random(index + 42)){ (_, it) => return; 0 }.count()
+  }
+  def testFilterWith(rdd: RDD[Int]): Unit = {
+    rdd.filterWith ((index: Int) => new Random(index + 42)){ (_, it) => return; true }.count()
+  }
+  def testForEachWith(rdd: RDD[Int]): Unit = {
+    rdd.foreachWith ((index: Int) => new Random(index + 42)){ (_, it) => return }
+  }
+  def testMapPartitionsWithContext(rdd: RDD[Int]): Unit = {
+    rdd.mapPartitionsWithContext { (_, it) => return; it }.count()
+  }
   def testZipPartitions2(rdd: RDD[Int]): Unit = {
     rdd.zipPartitions(rdd) { case (it1, it2) => return; it1 }.count()
   }

From 84bf931f36edf1f319c9116f7f326959a6118991 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Fri, 8 May 2015 17:24:32 -0700
Subject: [PATCH 057/320] [SPARK-7488] [ML] Feature Parity in PySpark for
 ml.recommendation

Adds Python Api for `ALS` under `ml.recommendation` in PySpark. Also adds seed as a settable parameter in the Scala Implementation of ALS.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #6015 from brkyvz/ml-rec and squashes the following commits:

be6e931 [Burak Yavuz] addressed comments
eaed879 [Burak Yavuz] readd numFeatures
0bd66b1 [Burak Yavuz] fixed seed
7f6d964 [Burak Yavuz] merged master
52e2bda [Burak Yavuz] added ALS
---
 .../apache/spark/ml/recommendation/ALS.scala  |  12 +-
 .../ml/param/_shared_params_code_gen.py       |   2 +
 python/pyspark/ml/param/shared.py             |  29 ++
 python/pyspark/ml/recommendation.py           | 279 ++++++++++++++++++
 4 files changed, 318 insertions(+), 4 deletions(-)
 create mode 100644 python/pyspark/ml/recommendation.py

diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 6cf4b40075281..d7cbffc3be26f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -49,7 +49,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * Common params for ALS.
  */
 private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam
-  with HasPredictionCol with HasCheckpointInterval {
+  with HasPredictionCol with HasCheckpointInterval with HasSeed {
 
   /**
    * Param for rank of the matrix factorization (>= 1).
@@ -147,7 +147,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR
 
   setDefault(rank -> 10, maxIter -> 10, regParam -> 0.1, numUserBlocks -> 10, numItemBlocks -> 10,
     implicitPrefs -> false, alpha -> 1.0, userCol -> "user", itemCol -> "item",
-    ratingCol -> "rating", nonnegative -> false, checkpointInterval -> 10)
+    ratingCol -> "rating", nonnegative -> false, checkpointInterval -> 10, seed -> 0L)
 
   /**
    * Validates and transforms the input schema.
@@ -278,6 +278,9 @@ class ALS extends Estimator[ALSModel] with ALSParams {
   /** @group setParam */
   def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
+  /** @group setParam */
+  def setSeed(value: Long): this.type = set(seed, value)
+
   /**
    * Sets both numUserBlocks and numItemBlocks to the specific value.
    * @group setParam
@@ -290,7 +293,8 @@ class ALS extends Estimator[ALSModel] with ALSParams {
 
   override def fit(dataset: DataFrame): ALSModel = {
     val ratings = dataset
-      .select(col($(userCol)), col($(itemCol)), col($(ratingCol)).cast(FloatType))
+      .select(col($(userCol)).cast(IntegerType), col($(itemCol)).cast(IntegerType),
+        col($(ratingCol)).cast(FloatType))
       .map { row =>
         Rating(row.getInt(0), row.getInt(1), row.getFloat(2))
       }
@@ -298,7 +302,7 @@ class ALS extends Estimator[ALSModel] with ALSParams {
       numUserBlocks = $(numUserBlocks), numItemBlocks = $(numItemBlocks),
       maxIter = $(maxIter), regParam = $(regParam), implicitPrefs = $(implicitPrefs),
       alpha = $(alpha), nonnegative = $(nonnegative),
-      checkpointInterval = $(checkpointInterval))
+      checkpointInterval = $(checkpointInterval), seed = $(seed))
     copyValues(new ALSModel(this, $(rank), userFactors, itemFactors))
   }
 
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index ee901f25847bc..ed3171b6976d3 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -97,6 +97,8 @@ def get$Name(self):
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
+        ("numFeatures", "number of features", None),
+        ("checkpointInterval", "checkpoint interval (>= 1)", None),
         ("seed", "random seed", None),
         ("tol", "the convergence tolerance for iterative algorithms", None),
         ("stepSize", "Step size to be used for each iteration of optimization.", None)]
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 5e7529c1dcf1e..d0bcadee22347 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -310,6 +310,35 @@ def getNumFeatures(self):
         return self.getOrDefault(self.numFeatures)
 
 
+class HasCheckpointInterval(Params):
+    """
+    Mixin for param checkpointInterval: checkpoint interval (>= 1).
+    """
+
+    # a placeholder to make it appear in the generated doc
+    checkpointInterval = Param(Params._dummy(), "checkpointInterval", "checkpoint interval (>= 1)")
+
+    def __init__(self):
+        super(HasCheckpointInterval, self).__init__()
+        #: param for checkpoint interval (>= 1)
+        self.checkpointInterval = Param(self, "checkpointInterval", "checkpoint interval (>= 1)")
+        if None is not None:
+            self._setDefault(checkpointInterval=None)
+
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        self.paramMap[self.checkpointInterval] = value
+        return self
+
+    def getCheckpointInterval(self):
+        """
+        Gets the value of checkpointInterval or its default value.
+        """
+        return self.getOrDefault(self.checkpointInterval)
+
+
 class HasSeed(Params):
     """
     Mixin for param seed: random seed.
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
new file mode 100644
index 0000000000000..4846b907e85ec
--- /dev/null
+++ b/python/pyspark/ml/recommendation.py
@@ -0,0 +1,279 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.ml.util import keyword_only
+from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.param.shared import *
+from pyspark.mllib.common import inherit_doc
+
+
+__all__ = ['ALS', 'ALSModel']
+
+
+@inherit_doc
+class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, HasRegParam, HasSeed):
+    """
+    Alternating Least Squares (ALS) matrix factorization.
+
+    ALS attempts to estimate the ratings matrix `R` as the product of
+    two lower-rank matrices, `X` and `Y`, i.e. `X * Yt = R`. Typically
+    these approximations are called 'factor' matrices. The general
+    approach is iterative. During each iteration, one of the factor
+    matrices is held constant, while the other is solved for using least
+    squares. The newly-solved factor matrix is then held constant while
+    solving for the other factor matrix.
+
+    This is a blocked implementation of the ALS factorization algorithm
+    that groups the two sets of factors (referred to as "users" and
+    "products") into blocks and reduces communication by only sending
+    one copy of each user vector to each product block on each
+    iteration, and only for the product blocks that need that user's
+    feature vector. This is achieved by pre-computing some information
+    about the ratings matrix to determine the "out-links" of each user
+    (which blocks of products it will contribute to) and "in-link"
+    information for each product (which of the feature vectors it
+    receives from each user block it will depend on). This allows us to
+    send only an array of feature vectors between each user block and
+    product block, and have the product block find the users' ratings
+    and update the products based on these messages.
+
+    For implicit preference data, the algorithm used is based on
+    "Collaborative Filtering for Implicit Feedback Datasets", available
+    at `http://dx.doi.org/10.1109/ICDM.2008.22`, adapted for the blocked
+    approach used here.
+
+    Essentially instead of finding the low-rank approximations to the
+    rating matrix `R`, this finds the approximations for a preference
+    matrix `P` where the elements of `P` are 1 if r > 0 and 0 if r <= 0.
+    The ratings then act as 'confidence' values related to strength of
+    indicated user preferences rather than explicit ratings given to
+    items.
+
+    >>> als = ALS(rank=10, maxIter=5)
+    >>> model = als.fit(df)
+    >>> test = sqlContext.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
+    >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
+    >>> predictions[0]
+    Row(user=0, item=2, prediction=0.39...)
+    >>> predictions[1]
+    Row(user=1, item=0, prediction=3.19...)
+    >>> predictions[2]
+    Row(user=2, item=0, prediction=-1.15...)
+    """
+    _java_class = "org.apache.spark.ml.recommendation.ALS"
+    # a placeholder to make it appear in the generated doc
+    rank = Param(Params._dummy(), "rank", "rank of the factorization")
+    numUserBlocks = Param(Params._dummy(), "numUserBlocks", "number of user blocks")
+    numItemBlocks = Param(Params._dummy(), "numItemBlocks", "number of item blocks")
+    implicitPrefs = Param(Params._dummy(), "implicitPrefs", "whether to use implicit preference")
+    alpha = Param(Params._dummy(), "alpha", "alpha for implicit preference")
+    userCol = Param(Params._dummy(), "userCol", "column name for user ids")
+    itemCol = Param(Params._dummy(), "itemCol", "column name for item ids")
+    ratingCol = Param(Params._dummy(), "ratingCol", "column name for ratings")
+    nonnegative = Param(Params._dummy(), "nonnegative",
+                        "whether to use nonnegative constraint for least squares")
+
+    @keyword_only
+    def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                 ratingCol="rating", nonnegative=False, checkpointInterval=10):
+        """
+        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                 ratingCol="rating", nonnegative=false, checkpointInterval=10)
+        """
+        super(ALS, self).__init__()
+        self.rank = Param(self, "rank", "rank of the factorization")
+        self.numUserBlocks = Param(self, "numUserBlocks", "number of user blocks")
+        self.numItemBlocks = Param(self, "numItemBlocks", "number of item blocks")
+        self.implicitPrefs = Param(self, "implicitPrefs", "whether to use implicit preference")
+        self.alpha = Param(self, "alpha", "alpha for implicit preference")
+        self.userCol = Param(self, "userCol", "column name for user ids")
+        self.itemCol = Param(self, "itemCol", "column name for item ids")
+        self.ratingCol = Param(self, "ratingCol", "column name for ratings")
+        self.nonnegative = Param(self, "nonnegative",
+                                 "whether to use nonnegative constraint for least squares")
+        self._setDefault(rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                         ratingCol="rating", nonnegative=False, checkpointInterval=10)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                  ratingCol="rating", nonnegative=False, checkpointInterval=10):
+        """
+        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                 ratingCol="rating", nonnegative=False, checkpointInterval=10)
+        Sets params for ALS.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return ALSModel(java_model)
+
+    def setRank(self, value):
+        """
+        Sets the value of :py:attr:`rank`.
+        """
+        self.paramMap[self.rank] = value
+        return self
+
+    def getRank(self):
+        """
+        Gets the value of rank or its default value.
+        """
+        return self.getOrDefault(self.rank)
+
+    def setNumUserBlocks(self, value):
+        """
+        Sets the value of :py:attr:`numUserBlocks`.
+        """
+        self.paramMap[self.numUserBlocks] = value
+        return self
+
+    def getNumUserBlocks(self):
+        """
+        Gets the value of numUserBlocks or its default value.
+        """
+        return self.getOrDefault(self.numUserBlocks)
+
+    def setNumItemBlocks(self, value):
+        """
+        Sets the value of :py:attr:`numItemBlocks`.
+        """
+        self.paramMap[self.numItemBlocks] = value
+        return self
+
+    def getNumItemBlocks(self):
+        """
+        Gets the value of numItemBlocks or its default value.
+        """
+        return self.getOrDefault(self.numItemBlocks)
+
+    def setNumBlocks(self, value):
+        """
+        Sets both :py:attr:`numUserBlocks` and :py:attr:`numItemBlocks` to the specific value.
+        """
+        self.paramMap[self.numUserBlocks] = value
+        self.paramMap[self.numItemBlocks] = value
+
+    def setImplicitPrefs(self, value):
+        """
+        Sets the value of :py:attr:`implicitPrefs`.
+        """
+        self.paramMap[self.implicitPrefs] = value
+        return self
+
+    def getImplicitPrefs(self):
+        """
+        Gets the value of implicitPrefs or its default value.
+        """
+        return self.getOrDefault(self.implicitPrefs)
+
+    def setAlpha(self, value):
+        """
+        Sets the value of :py:attr:`alpha`.
+        """
+        self.paramMap[self.alpha] = value
+        return self
+
+    def getAlpha(self):
+        """
+        Gets the value of alpha or its default value.
+        """
+        return self.getOrDefault(self.alpha)
+
+    def setUserCol(self, value):
+        """
+        Sets the value of :py:attr:`userCol`.
+        """
+        self.paramMap[self.userCol] = value
+        return self
+
+    def getUserCol(self):
+        """
+        Gets the value of userCol or its default value.
+        """
+        return self.getOrDefault(self.userCol)
+
+    def setItemCol(self, value):
+        """
+        Sets the value of :py:attr:`itemCol`.
+        """
+        self.paramMap[self.itemCol] = value
+        return self
+
+    def getItemCol(self):
+        """
+        Gets the value of itemCol or its default value.
+        """
+        return self.getOrDefault(self.itemCol)
+
+    def setRatingCol(self, value):
+        """
+        Sets the value of :py:attr:`ratingCol`.
+        """
+        self.paramMap[self.ratingCol] = value
+        return self
+
+    def getRatingCol(self):
+        """
+        Gets the value of ratingCol or its default value.
+        """
+        return self.getOrDefault(self.ratingCol)
+
+    def setNonnegative(self, value):
+        """
+        Sets the value of :py:attr:`nonnegative`.
+        """
+        self.paramMap[self.nonnegative] = value
+        return self
+
+    def getNonnegative(self):
+        """
+        Gets the value of nonnegative or its default value.
+        """
+        return self.getOrDefault(self.nonnegative)
+
+
+class ALSModel(JavaModel):
+    """
+    Model fitted by ALS.
+    """
+
+
+if __name__ == "__main__":
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SQLContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    sc = SparkContext("local[2]", "ml.recommendation tests")
+    sqlContext = SQLContext(sc)
+    globs['sc'] = sc
+    globs['sqlContext'] = sqlContext
+    globs['df'] = sqlContext.createDataFrame([(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0),
+                                              (2, 1, 1.0), (2, 2, 5.0)], ["user", "item", "rating"])
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    sc.stop()
+    if failure_count:
+        exit(-1)

From b6c797b08cbd08d7aab59ad0106af0f5f41ef186 Mon Sep 17 00:00:00 2001
From: Ashwin Shankar <ashankar@netflix.com>
Date: Fri, 8 May 2015 17:51:00 -0700
Subject: [PATCH 058/320] [SPARK-7451] [YARN] Preemption of executors is
 counted as failure causing Spark job to fail

Added a check to handle container exit status for the preemption scenario, log an INFO message in such cases and move on.
andrewor14

Author: Ashwin Shankar <ashankar@netflix.com>

Closes #5993 from ashwinshankar77/SPARK-7451 and squashes the following commits:

90900cf [Ashwin Shankar] Fix log info message
cf8b6cf [Ashwin Shankar] Stop counting preemption of executors as failure
---
 .../scala/org/apache/spark/deploy/yarn/YarnAllocator.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index b8f42dadcb464..88d68d5556162 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -373,7 +373,9 @@ private[yarn] class YarnAllocator(
         // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
         // there are some exit status' we shouldn't necessarily count against us, but for
         // now I think its ok as none of the containers are expected to exit
-        if (completedContainer.getExitStatus == -103) { // vmem limit exceeded
+        if (completedContainer.getExitStatus == ContainerExitStatus.PREEMPTED) {
+          logInfo("Container preempted: " + containerId)
+        } else if (completedContainer.getExitStatus == -103) { // vmem limit exceeded
           logWarning(memLimitExceededLogMessage(
             completedContainer.getDiagnostics,
             VMEM_EXCEEDED_PATTERN))

From 0a901dd3a1eb3fd459d45b771ce4ad2cfef2a944 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Fri, 8 May 2015 18:29:57 -0700
Subject: [PATCH 059/320] [SPARK-7231] [SPARKR] Changes to make SparkR
 DataFrame dplyr friendly.

Changes include
1. Rename sortDF to arrange
2. Add new aliases `group_by` and `sample_frac`, `summarize`
3. Add more user friendly column addition (mutate), rename
4. Support mean as an alias for avg in Scala and also support n_distinct, n as in dplyr

Using these changes we can pretty much run the examples as described in http://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html with the same syntax

The only thing missing in SparkR is auto resolving column names when used in an expression i.e. making something like `select(flights, delay)` works in dply but we right now need `select(flights, flights$delay)` or `select(flights, "delay")`. But this is a complicated change and I'll file a new issue for it

cc sun-rui rxin

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #6005 from shivaram/sparkr-df-api and squashes the following commits:

5e0716a [Shivaram Venkataraman] Fix some roxygen bugs
1254953 [Shivaram Venkataraman] Merge branch 'master' of https://github.com/apache/spark into sparkr-df-api
0521149 [Shivaram Venkataraman] Changes to make SparkR DataFrame dplyr friendly. Changes include 1. Rename sortDF to arrange 2. Add new aliases `group_by` and `sample_frac`, `summarize` 3. Add more user friendly column addition (mutate), rename 4. Support mean as an alias for avg in Scala and also support n_distinct, n as in dplyr
---
 R/pkg/NAMESPACE                               |  11 +-
 R/pkg/R/DataFrame.R                           | 127 ++++++++++++++++--
 R/pkg/R/column.R                              |  32 ++++-
 R/pkg/R/generics.R                            |  41 +++++-
 R/pkg/R/group.R                               |  10 +-
 R/pkg/inst/tests/test_sparkSQL.R              |  36 ++++-
 .../org/apache/spark/sql/functions.scala      |  16 +++
 .../org/apache/spark/sql/DataFrameSuite.scala |   5 +
 8 files changed, 249 insertions(+), 29 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 7611f479a628b..819e9a24e5c0e 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -9,7 +9,8 @@ export("print.jobj")
 
 exportClasses("DataFrame")
 
-exportMethods("cache",
+exportMethods("arrange",
+              "cache",
               "collect",
               "columns",
               "count",
@@ -20,6 +21,7 @@ exportMethods("cache",
               "explain",
               "filter",
               "first",
+              "group_by",
               "groupBy",
               "head",
               "insertInto",
@@ -28,12 +30,15 @@ exportMethods("cache",
               "join",
               "limit",
               "orderBy",
+              "mutate",
               "names",
               "persist",
               "printSchema",
               "registerTempTable",
+              "rename",
               "repartition",
               "sampleDF",
+              "sample_frac",
               "saveAsParquetFile",
               "saveAsTable",
               "saveDF",
@@ -42,7 +47,7 @@ exportMethods("cache",
               "selectExpr",
               "show",
               "showDF",
-              "sortDF",
+              "summarize",
               "take",
               "unionAll",
               "unpersist",
@@ -72,6 +77,8 @@ exportMethods("abs",
               "max",
               "mean",
               "min",
+              "n",
+              "n_distinct",
               "rlike",
               "sqrt",
               "startsWith",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 354642e7bc307..8a9d2dd45c588 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -480,6 +480,7 @@ setMethod("distinct",
 #' @param withReplacement Sampling with replacement or not
 #' @param fraction The (rough) sample target fraction
 #' @rdname sampleDF
+#' @aliases sample_frac
 #' @export
 #' @examples
 #'\dontrun{
@@ -501,6 +502,15 @@ setMethod("sampleDF",
             dataFrame(sdf)
           })
 
+#' @rdname sampleDF
+#' @aliases sampleDF
+setMethod("sample_frac",
+          signature(x = "DataFrame", withReplacement = "logical",
+                    fraction = "numeric"),
+          function(x, withReplacement, fraction) {
+            sampleDF(x, withReplacement, fraction)
+          })
+
 #' Count
 #' 
 #' Returns the number of rows in a DataFrame
@@ -682,7 +692,8 @@ setMethod("toRDD",
 #' @param x a DataFrame
 #' @return a GroupedData
 #' @seealso GroupedData
-#' @rdname DataFrame
+#' @aliases group_by
+#' @rdname groupBy
 #' @export
 #' @examples
 #' \dontrun{
@@ -705,12 +716,21 @@ setMethod("groupBy",
              groupedData(sgd)
            })
 
-#' Agg
+#' @rdname groupBy
+#' @aliases group_by
+setMethod("group_by",
+          signature(x = "DataFrame"),
+          function(x, ...) {
+            groupBy(x, ...)
+          })
+
+#' Summarize data across columns
 #'
 #' Compute aggregates by specifying a list of columns
 #'
 #' @param x a DataFrame
 #' @rdname DataFrame
+#' @aliases summarize
 #' @export
 setMethod("agg",
           signature(x = "DataFrame"),
@@ -718,6 +738,14 @@ setMethod("agg",
             agg(groupBy(x), ...)
           })
 
+#' @rdname DataFrame
+#' @aliases agg
+setMethod("summarize",
+          signature(x = "DataFrame"),
+          function(x, ...) {
+            agg(x, ...)
+          })
+
 
 ############################## RDD Map Functions ##################################
 # All of the following functions mirror the existing RDD map functions,           #
@@ -886,7 +914,7 @@ setMethod("select",
           signature(x = "DataFrame", col = "list"),
           function(x, col) {
             cols <- lapply(col, function(c) {
-              if (class(c)== "Column") {
+              if (class(c) == "Column") {
                 c@jc
               } else {
                 col(c)@jc
@@ -946,6 +974,42 @@ setMethod("withColumn",
             select(x, x$"*", alias(col, colName))
           })
 
+#' Mutate
+#'
+#' Return a new DataFrame with the specified columns added.
+#'
+#' @param x A DataFrame
+#' @param col a named argument of the form name = col
+#' @return A new DataFrame with the new columns added.
+#' @rdname withColumn
+#' @aliases withColumn
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlCtx <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlCtx, path)
+#' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
+#' names(newDF) # Will contain newCol, newCol2
+#' }
+setMethod("mutate",
+          signature(x = "DataFrame"),
+          function(x, ...) {
+            cols <- list(...)
+            stopifnot(length(cols) > 0)
+            stopifnot(class(cols[[1]]) == "Column")
+            ns <- names(cols)
+            if (!is.null(ns)) {
+              for (n in ns) {
+                if (n != "") {
+                  cols[[n]] <- alias(cols[[n]], n)
+                }
+              }
+            }
+            do.call(select, c(x, x$"*", cols))
+          })
+
 #' WithColumnRenamed
 #'
 #' Rename an existing column in a DataFrame.
@@ -977,9 +1041,47 @@ setMethod("withColumnRenamed",
             select(x, cols)
           })
 
+#' Rename
+#'
+#' Rename an existing column in a DataFrame.
+#'
+#' @param x A DataFrame
+#' @param newCol A named pair of the form new_column_name = existing_column
+#' @return A DataFrame with the column name changed.
+#' @rdname withColumnRenamed
+#' @aliases withColumnRenamed
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlCtx <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlCtx, path)
+#' newDF <- rename(df, col1 = df$newCol1)
+#' }
+setMethod("rename",
+          signature(x = "DataFrame"),
+          function(x, ...) {
+            renameCols <- list(...)
+            stopifnot(length(renameCols) > 0)
+            stopifnot(class(renameCols[[1]]) == "Column")
+            newNames <- names(renameCols)
+            oldNames <- lapply(renameCols, function(col) {
+              callJMethod(col@jc, "toString")
+            })
+            cols <- lapply(columns(x), function(c) {
+              if (c %in% oldNames) {
+                alias(col(c), newNames[[match(c, oldNames)]])
+              } else {
+                col(c)
+              }
+            })
+            select(x, cols)
+          })
+
 setClassUnion("characterOrColumn", c("character", "Column"))
 
-#' SortDF 
+#' Arrange
 #'
 #' Sort a DataFrame by the specified column(s).
 #'
@@ -987,7 +1089,7 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' @param col Either a Column object or character vector indicating the field to sort on
 #' @param ... Additional sorting fields
 #' @return A DataFrame where all elements are sorted.
-#' @rdname sortDF
+#' @rdname arrange
 #' @export
 #' @examples
 #'\dontrun{
@@ -995,11 +1097,11 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' sortDF(df, df$col1)
-#' sortDF(df, "col1")
-#' sortDF(df, asc(df$col1), desc(abs(df$col2)))
+#' arrange(df, df$col1)
+#' arrange(df, "col1")
+#' arrange(df, asc(df$col1), desc(abs(df$col2)))
 #' }
-setMethod("sortDF",
+setMethod("arrange",
           signature(x = "DataFrame", col = "characterOrColumn"),
           function(x, col, ...) {
             if (class(col) == "character") {
@@ -1013,12 +1115,12 @@ setMethod("sortDF",
             dataFrame(sdf)
           })
 
-#' @rdname sortDF
+#' @rdname arrange
 #' @aliases orderBy,DataFrame,function-method
 setMethod("orderBy",
           signature(x = "DataFrame", col = "characterOrColumn"),
           function(x, col) {
-            sortDF(x, col)
+            arrange(x, col)
           })
 
 #' Filter
@@ -1026,7 +1128,7 @@ setMethod("orderBy",
 #' Filter the rows of a DataFrame according to a given condition.
 #'
 #' @param x A DataFrame to be sorted.
-#' @param condition The condition to sort on. This may either be a Column expression
+#' @param condition The condition to filter on. This may either be a Column expression
 #' or a string containing a SQL statement
 #' @return A DataFrame containing only the rows that meet the condition.
 #' @rdname filter
@@ -1106,6 +1208,7 @@ setMethod("join",
 #'
 #' Return a new DataFrame containing the union of rows in this DataFrame
 #' and another DataFrame. This is equivalent to `UNION ALL` in SQL.
+#' Note that this does not remove duplicate rows across the two DataFrames.
 #'
 #' @param x A Spark DataFrame
 #' @param y A Spark DataFrame
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 95fb9ff0887b6..9a68445ab451a 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -131,6 +131,8 @@ createMethods()
 #' alias
 #'
 #' Set a new name for a column
+
+#' @rdname column
 setMethod("alias",
           signature(object = "Column"),
           function(object, data) {
@@ -141,8 +143,12 @@ setMethod("alias",
             }
           })
 
+#' substr
+#'
 #' An expression that returns a substring.
 #'
+#' @rdname column
+#'
 #' @param start starting position
 #' @param stop ending position
 setMethod("substr", signature(x = "Column"),
@@ -152,6 +158,9 @@ setMethod("substr", signature(x = "Column"),
           })
 
 #' Casts the column to a different data type.
+#'
+#' @rdname column
+#'
 #' @examples
 #' \dontrun{
 #'   cast(df$age, "string")
@@ -173,8 +182,8 @@ setMethod("cast",
 
 #' Approx Count Distinct
 #'
-#' Returns the approximate number of distinct items in a group.
-#'
+#' @rdname column
+#' @return the approximate number of distinct items in a group.
 setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x, rsd = 0.95) {
@@ -184,8 +193,8 @@ setMethod("approxCountDistinct",
 
 #' Count Distinct
 #'
-#' returns the number of distinct items in a group.
-#'
+#' @rdname column
+#' @return the number of distinct items in a group.
 setMethod("countDistinct",
           signature(x = "Column"),
           function(x, ...) {
@@ -197,3 +206,18 @@ setMethod("countDistinct",
             column(jc)
           })
 
+#' @rdname column
+#' @aliases countDistinct
+setMethod("n_distinct",
+          signature(x = "Column"),
+          function(x, ...) {
+            countDistinct(x, ...)
+          })
+
+#' @rdname column
+#' @aliases count
+setMethod("n",
+          signature(x = "Column"),
+          function(x) {
+            count(x)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 380e8ebe8c8f4..557128a419f19 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -380,6 +380,14 @@ setGeneric("value", function(bcast) { standardGeneric("value") })
 
 ####################  DataFrame Methods ########################
 
+#' @rdname agg
+#' @export
+setGeneric("agg", function (x, ...) { standardGeneric("agg") })
+
+#' @rdname arrange
+#' @export
+setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") })
+
 #' @rdname schema
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
@@ -404,6 +412,10 @@ setGeneric("except", function(x, y) { standardGeneric("except") })
 #' @export
 setGeneric("filter", function(x, condition) { standardGeneric("filter") })
 
+#' @rdname groupBy
+#' @export
+setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
+
 #' @rdname DataFrame
 #' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
@@ -424,7 +436,11 @@ setGeneric("isLocal", function(x) { standardGeneric("isLocal") })
 #' @export
 setGeneric("limit", function(x, num) {standardGeneric("limit") })
 
-#' @rdname sortDF
+#' @rdname withColumn
+#' @export
+setGeneric("mutate", function(x, ...) {standardGeneric("mutate") })
+
+#' @rdname arrange
 #' @export
 setGeneric("orderBy", function(x, col) { standardGeneric("orderBy") })
 
@@ -432,10 +448,21 @@ setGeneric("orderBy", function(x, col) { standardGeneric("orderBy") })
 #' @export
 setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
 
+#' @rdname withColumnRenamed
+#' @export
+setGeneric("rename", function(x, ...) { standardGeneric("rename") })
+
 #' @rdname registerTempTable
 #' @export
 setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
 
+#' @rdname sampleDF
+#' @export
+setGeneric("sample_frac",
+           function(x, withReplacement, fraction, seed) {
+             standardGeneric("sample_frac")
+          })
+
 #' @rdname sampleDF
 #' @export
 setGeneric("sampleDF",
@@ -473,9 +500,9 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr")
 #' @export
 setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
 
-#' @rdname sortDF
+#' @rdname agg
 #' @export
-setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") })
+setGeneric("summarize", function(x,...) { standardGeneric("summarize") })
 
 # @rdname tojson
 # @export
@@ -564,6 +591,14 @@ setGeneric("like", function(x, ...) { standardGeneric("like") })
 #' @export
 setGeneric("lower", function(x) { standardGeneric("lower") })
 
+#' @rdname column
+#' @export
+setGeneric("n", function(x) { standardGeneric("n") })
+
+#' @rdname column
+#' @export
+setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
+
 #' @rdname column
 #' @export
 setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 02237b3672d6b..5a7a8a2caba13 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -56,6 +56,7 @@ setMethod("show", "GroupedData",
 #'
 #' @param x a GroupedData
 #' @return a DataFrame
+#' @rdname agg
 #' @export
 #' @examples
 #' \dontrun{
@@ -83,8 +84,6 @@ setMethod("count",
 #'  df2 <- agg(df, age = "sum")  # new column name will be created as 'SUM(age#0)'
 #'  df2 <- agg(df, ageSum = sum(df$age)) # Creates a new column named ageSum
 #' }
-setGeneric("agg", function (x, ...) { standardGeneric("agg") })
-
 setMethod("agg",
           signature(x = "GroupedData"),
           function(x, ...) {
@@ -112,6 +111,13 @@ setMethod("agg",
             dataFrame(sdf)
           })
 
+#' @rdname agg
+#' @aliases agg
+setMethod("summarize",
+          signature(x = "GroupedData"),
+          function(x, ...) {
+            agg(x, ...)
+          })
 
 # sum/mean/avg/min/max
 methods <- c("sum", "mean", "avg", "min", "max")
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 7a42e289fcd9e..dbb535e245321 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -428,6 +428,10 @@ test_that("sampleDF on a DataFrame", {
   expect_true(inherits(sampled, "DataFrame"))
   sampled2 <- sampleDF(df, FALSE, 0.1)
   expect_true(count(sampled2) < 3)
+
+  # Also test sample_frac
+  sampled3 <- sample_frac(df, FALSE, 0.1)
+  expect_true(count(sampled3) < 3)
 })
 
 test_that("select operators", {
@@ -533,6 +537,7 @@ test_that("column functions", {
   c2 <- min(c) + max(c) + sum(c) + avg(c) + count(c) + abs(c) + sqrt(c)
   c3 <- lower(c) + upper(c) + first(c) + last(c)
   c4 <- approxCountDistinct(c) + countDistinct(c) + cast(c, "string")
+  c5 <- n(c) + n_distinct(c)
 })
 
 test_that("string operators", {
@@ -557,6 +562,13 @@ test_that("group by", {
   expect_true(inherits(df2, "DataFrame"))
   expect_true(3 == count(df2))
 
+  # Also test group_by, summarize, mean
+  gd1 <- group_by(df, "name")
+  expect_true(inherits(gd1, "GroupedData"))
+  df_summarized <- summarize(gd, mean_age = mean(df$age))
+  expect_true(inherits(df_summarized, "DataFrame"))
+  expect_true(3 == count(df_summarized))
+
   df3 <- agg(gd, age = "sum")
   expect_true(inherits(df3, "DataFrame"))
   expect_true(3 == count(df3))
@@ -573,12 +585,12 @@ test_that("group by", {
   expect_true(3 == count(max(gd, "age")))
 })
 
-test_that("sortDF() and orderBy() on a DataFrame", {
+test_that("arrange() and orderBy() on a DataFrame", {
   df <- jsonFile(sqlCtx, jsonPath)
-  sorted <- sortDF(df, df$age)
+  sorted <- arrange(df, df$age)
   expect_true(collect(sorted)[1,2] == "Michael")
 
-  sorted2 <- sortDF(df, "name")
+  sorted2 <- arrange(df, "name")
   expect_true(collect(sorted2)[2,"age"] == 19)
 
   sorted3 <- orderBy(df, asc(df$age))
@@ -659,17 +671,17 @@ test_that("unionAll(), except(), and intersect() on a DataFrame", {
   writeLines(lines, jsonPath2)
   df2 <- loadDF(sqlCtx, jsonPath2, "json")
 
-  unioned <- sortDF(unionAll(df, df2), df$age)
+  unioned <- arrange(unionAll(df, df2), df$age)
   expect_true(inherits(unioned, "DataFrame"))
   expect_true(count(unioned) == 6)
   expect_true(first(unioned)$name == "Michael")
 
-  excepted <- sortDF(except(df, df2), desc(df$age))
+  excepted <- arrange(except(df, df2), desc(df$age))
   expect_true(inherits(unioned, "DataFrame"))
   expect_true(count(excepted) == 2)
   expect_true(first(excepted)$name == "Justin")
 
-  intersected <- sortDF(intersect(df, df2), df$age)
+  intersected <- arrange(intersect(df, df2), df$age)
   expect_true(inherits(unioned, "DataFrame"))
   expect_true(count(intersected) == 1)
   expect_true(first(intersected)$name == "Andy")
@@ -687,6 +699,18 @@ test_that("withColumn() and withColumnRenamed()", {
   expect_true(columns(newDF2)[1] == "newerAge")
 })
 
+test_that("mutate() and rename()", {
+  df <- jsonFile(sqlCtx, jsonPath)
+  newDF <- mutate(df, newAge = df$age + 2)
+  expect_true(length(columns(newDF)) == 3)
+  expect_true(columns(newDF)[3] == "newAge")
+  expect_true(first(filter(newDF, df$name != "Michael"))$newAge == 32)
+
+  newDF2 <- rename(df, newerAge = df$age)
+  expect_true(length(columns(newDF2)) == 2)
+  expect_true(columns(newDF2)[1] == "newerAge")
+})
+
 test_that("saveDF() on DataFrame and works with parquetFile", {
   df <- jsonFile(sqlCtx, jsonPath)
   saveDF(df, parquetPath, "parquet", mode="overwrite")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 1728b0b8c910e..fae4bd0fd2994 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -246,6 +246,22 @@ object functions {
    */
   def last(columnName: String): Column = last(Column(columnName))
 
+  /**
+   * Aggregate function: returns the average of the values in a group.
+   * Alias for avg.
+   *
+   * @group agg_funcs
+   */
+  def mean(e: Column): Column = avg(e)
+
+  /**
+   * Aggregate function: returns the average of the values in a group.
+   * Alias for avg.
+   *
+   * @group agg_funcs
+   */
+  def mean(columnName: String): Column = avg(columnName)
+
   /**
    * Aggregate function: returns the minimum value of the expression in a group.
    *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d2ca8dccae574..cf590cbd5219c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -308,6 +308,11 @@ class DataFrameSuite extends QueryTest {
       testData2.agg(avg('a)),
       Row(2.0))
 
+    // Also check mean
+    checkAnswer(
+      testData2.agg(mean('a)),
+      Row(2.0))
+
     checkAnswer(
       testData2.agg(avg('a), sumDistinct('a)), // non-partial
       Row(2.0, 6.0) :: Nil)

From cde5483884068b0ae1470b9b9b3ee54ab944ab12 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Fri, 8 May 2015 22:09:55 -0400
Subject: [PATCH 060/320] [SPARK-7375] [SQL] Avoid row copying in exchange when
 sort.serializeMapOutputs takes effect

This patch refactors the SQL `Exchange` operator's logic for determining whether map outputs need to be copied before being shuffled. As part of this change, we'll now avoid unnecessary copies in cases where sort-based shuffle operates on serialized map outputs (as in #4450 /
SPARK-4550).

This patch also includes a change to copy the input to RangePartitioner partition bounds calculation, which is necessary because this calculation buffers mutable Java objects.

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/5948)
<!-- Reviewable:end -->

Author: Josh Rosen <joshrosen@databricks.com>

Closes #5948 from JoshRosen/SPARK-7375 and squashes the following commits:

f305ff3 [Josh Rosen] Reduce scope of some variables in Exchange
899e1d7 [Josh Rosen] Merge remote-tracking branch 'origin/master' into SPARK-7375
6a6bfce [Josh Rosen] Fix issue related to RangePartitioning:
ad006a4 [Josh Rosen] [SPARK-7375] Avoid defensive copying in exchange operator when sort.serializeMapOutputs takes effect.
---
 .../apache/spark/sql/execution/Exchange.scala | 156 +++++++++++-------
 1 file changed, 100 insertions(+), 56 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index f02fa81e95e84..c3d2c7019a54a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.shuffle.sort.SortShuffleManager
-import org.apache.spark.{SparkEnv, HashPartitioner, RangePartitioner}
+import org.apache.spark.{HashPartitioner, Partitioner, RangePartitioner, SparkEnv}
 import org.apache.spark.rdd.{RDD, ShuffledRDD}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.sql.{SQLContext, Row}
@@ -59,11 +59,62 @@ case class Exchange(
 
   override def output: Seq[Attribute] = child.output
 
-  /** We must copy rows when sort based shuffle is on */
-  protected def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
-
-  private val bypassMergeThreshold =
-    child.sqlContext.sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+  /**
+   * Determines whether records must be defensively copied before being sent to the shuffle.
+   * Several of Spark's shuffle components will buffer deserialized Java objects in memory. The
+   * shuffle code assumes that objects are immutable and hence does not perform its own defensive
+   * copying. In Spark SQL, however, operators' iterators return the same mutable `Row` object. In
+   * order to properly shuffle the output of these operators, we need to perform our own copying
+   * prior to sending records to the shuffle. This copying is expensive, so we try to avoid it
+   * whenever possible. This method encapsulates the logic for choosing when to copy.
+   *
+   * In the long run, we might want to push this logic into core's shuffle APIs so that we don't
+   * have to rely on knowledge of core internals here in SQL.
+   *
+   * See SPARK-2967, SPARK-4479, and SPARK-7375 for more discussion of this issue.
+   *
+   * @param partitioner the partitioner for the shuffle
+   * @param serializer the serializer that will be used to write rows
+   * @return true if rows should be copied before being shuffled, false otherwise
+   */
+  private def needToCopyObjectsBeforeShuffle(
+      partitioner: Partitioner,
+      serializer: Serializer): Boolean = {
+    // Note: even though we only use the partitioner's `numPartitions` field, we require it to be
+    // passed instead of directly passing the number of partitions in order to guard against
+    // corner-cases where a partitioner constructed with `numPartitions` partitions may output
+    // fewer partitions (like RangePartitioner, for example).
+    val conf = child.sqlContext.sparkContext.conf
+    val sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
+    val bypassMergeThreshold = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
+    val serializeMapOutputs = conf.getBoolean("spark.shuffle.sort.serializeMapOutputs", true)
+    if (newOrdering.nonEmpty) {
+      // If a new ordering is required, then records will be sorted with Spark's `ExternalSorter`,
+      // which requires a defensive copy.
+      true
+    } else if (sortBasedShuffleOn) {
+      // Spark's sort-based shuffle also uses `ExternalSorter` to buffer records in memory.
+      // However, there are two special cases where we can avoid the copy, described below:
+      if (partitioner.numPartitions <= bypassMergeThreshold) {
+        // If the number of output partitions is sufficiently small, then Spark will fall back to
+        // the old hash-based shuffle write path which doesn't buffer deserialized records.
+        // Note that we'll have to remove this case if we fix SPARK-6026 and remove this bypass.
+        false
+      } else if (serializeMapOutputs && serializer.supportsRelocationOfSerializedObjects) {
+        // SPARK-4550 extended sort-based shuffle to serialize individual records prior to sorting
+        // them. This optimization is guarded by a feature-flag and is only applied in cases where
+        // shuffle dependency does not specify an ordering and the record serializer has certain
+        // properties. If this optimization is enabled, we can safely avoid the copy.
+        false
+      } else {
+        // None of the special cases held, so we must copy.
+        true
+      }
+    } else {
+      // We're using hash-based shuffle, so we don't need to copy.
+      false
+    }
+  }
 
   private val keyOrdering = {
     if (newOrdering.nonEmpty) {
@@ -81,7 +132,7 @@ case class Exchange(
 
   @transient private lazy val sparkConf = child.sqlContext.sparkContext.getConf
 
-  def serializer(
+  private def getSerializer(
       keySchema: Array[DataType],
       valueSchema: Array[DataType],
       hasKeyOrdering: Boolean,
@@ -112,17 +163,12 @@ case class Exchange(
   protected override def doExecute(): RDD[Row] = attachTree(this , "execute") {
     newPartitioning match {
       case HashPartitioning(expressions, numPartitions) =>
-        // TODO: Eliminate redundant expressions in grouping key and value.
-        // This is a workaround for SPARK-4479. When:
-        //  1. sort based shuffle is on, and
-        //  2. the partition number is under the merge threshold, and
-        //  3. no ordering is required
-        // we can avoid the defensive copies to improve performance. In the long run, we probably
-        // want to include information in shuffle dependencies to indicate whether elements in the
-        // source RDD should be copied.
-        val willMergeSort = sortBasedShuffleOn && numPartitions > bypassMergeThreshold
-
-        val rdd = if (willMergeSort || newOrdering.nonEmpty) {
+        val keySchema = expressions.map(_.dataType).toArray
+        val valueSchema = child.output.map(_.dataType).toArray
+        val serializer = getSerializer(keySchema, valueSchema, newOrdering.nonEmpty, numPartitions)
+        val part = new HashPartitioner(numPartitions)
+
+        val rdd = if (needToCopyObjectsBeforeShuffle(part, serializer)) {
           child.execute().mapPartitions { iter =>
             val hashExpressions = newMutableProjection(expressions, child.output)()
             iter.map(r => (hashExpressions(r).copy(), r.copy()))
@@ -134,52 +180,52 @@ case class Exchange(
             iter.map(r => mutablePair.update(hashExpressions(r), r))
           }
         }
-        val part = new HashPartitioner(numPartitions)
-        val shuffled =
-          if (newOrdering.nonEmpty) {
-            new ShuffledRDD[Row, Row, Row](rdd, part).setKeyOrdering(keyOrdering)
-          } else {
-            new ShuffledRDD[Row, Row, Row](rdd, part)
-          }
-        val keySchema = expressions.map(_.dataType).toArray
-        val valueSchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(
-          serializer(keySchema, valueSchema, newOrdering.nonEmpty, numPartitions))
-
+        val shuffled = new ShuffledRDD[Row, Row, Row](rdd, part)
+        if (newOrdering.nonEmpty) {
+          shuffled.setKeyOrdering(keyOrdering)
+        }
+        shuffled.setSerializer(serializer)
         shuffled.map(_._2)
 
       case RangePartitioning(sortingExpressions, numPartitions) =>
-        val rdd = if (sortBasedShuffleOn || newOrdering.nonEmpty) {
-          child.execute().mapPartitions { iter => iter.map(row => (row.copy(), null))}
-        } else {
-          child.execute().mapPartitions { iter =>
-            val mutablePair = new MutablePair[Row, Null](null, null)
-            iter.map(row => mutablePair.update(row, null))
+        val keySchema = child.output.map(_.dataType).toArray
+        val serializer = getSerializer(keySchema, null, newOrdering.nonEmpty, numPartitions)
+
+        val childRdd = child.execute()
+        val part: Partitioner = {
+          // Internally, RangePartitioner runs a job on the RDD that samples keys to compute
+          // partition bounds. To get accurate samples, we need to copy the mutable keys.
+          val rddForSampling = childRdd.mapPartitions { iter =>
+            val mutablePair = new MutablePair[Row, Null]()
+            iter.map(row => mutablePair.update(row.copy(), null))
           }
+          // TODO: RangePartitioner should take an Ordering.
+          implicit val ordering = new RowOrdering(sortingExpressions, child.output)
+          new RangePartitioner(numPartitions, rddForSampling, ascending = true)
         }
 
-        // TODO: RangePartitioner should take an Ordering.
-        implicit val ordering = new RowOrdering(sortingExpressions, child.output)
-
-        val part = new RangePartitioner(numPartitions, rdd, ascending = true)
-        val shuffled =
-          if (newOrdering.nonEmpty) {
-            new ShuffledRDD[Row, Null, Null](rdd, part).setKeyOrdering(keyOrdering)
-          } else {
-            new ShuffledRDD[Row, Null, Null](rdd, part)
+        val rdd = if (needToCopyObjectsBeforeShuffle(part, serializer)) {
+          childRdd.mapPartitions { iter => iter.map(row => (row.copy(), null))}
+        } else {
+          childRdd.mapPartitions { iter =>
+            val mutablePair = new MutablePair[Row, Null]()
+            iter.map(row => mutablePair.update(row, null))
           }
-        val keySchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(
-          serializer(keySchema, null, newOrdering.nonEmpty, numPartitions))
+        }
 
+        val shuffled = new ShuffledRDD[Row, Null, Null](rdd, part)
+        if (newOrdering.nonEmpty) {
+          shuffled.setKeyOrdering(keyOrdering)
+        }
+        shuffled.setSerializer(serializer)
         shuffled.map(_._1)
 
       case SinglePartition =>
-        // SPARK-4479: Can't turn off defensive copy as what we do for `HashPartitioning`, since
-        // operators like `TakeOrdered` may require an ordering within the partition, and currently
-        // `SinglePartition` doesn't include ordering information.
-        // TODO Add `SingleOrderedPartition` for operators like `TakeOrdered`
-        val rdd = if (sortBasedShuffleOn) {
+        val valueSchema = child.output.map(_.dataType).toArray
+        val serializer = getSerializer(null, valueSchema, hasKeyOrdering = false, 1)
+        val partitioner = new HashPartitioner(1)
+
+        val rdd = if (needToCopyObjectsBeforeShuffle(partitioner, serializer)) {
           child.execute().mapPartitions { iter => iter.map(r => (null, r.copy())) }
         } else {
           child.execute().mapPartitions { iter =>
@@ -187,10 +233,8 @@ case class Exchange(
             iter.map(r => mutablePair.update(null, r))
           }
         }
-        val partitioner = new HashPartitioner(1)
         val shuffled = new ShuffledRDD[Null, Row, Row](rdd, partitioner)
-        val valueSchema = child.output.map(_.dataType).toArray
-        shuffled.setSerializer(serializer(null, valueSchema, false, 1))
+        shuffled.setSerializer(serializer)
         shuffled.map(_._2)
 
       case _ => sys.error(s"Exchange not implemented for $newPartitioning")

From 86ef4cfd436867d88bdc211f76d6ea668d474558 Mon Sep 17 00:00:00 2001
From: DB Tsai <dbt@netflix.com>
Date: Fri, 8 May 2015 21:43:05 -0700
Subject: [PATCH 061/320] [SPARK-7262] [ML] Binary LogisticRegression with
 L1/L2 (elastic net) using OWLQN in new ML package

1) Handle scaling and addBias internally.
2) L1/L2 elasticnet using OWLQN optimizer.

Author: DB Tsai <dbt@netflix.com>

Closes #5967 from dbtsai/lor and squashes the following commits:

fa029bb [DB Tsai] made the bound smaller
0806002 [DB Tsai] better initial intercept and more test
5c31824 [DB Tsai] fix import
c387e25 [DB Tsai] Merge branch 'master' into lor
c84e931 [DB Tsai] Made MultiClassSummarizer private
f98e711 [DB Tsai] address feedback
a784321 [DB Tsai] fix style
8ec65d2 [DB Tsai] remove new line
f3f8c88 [DB Tsai] add more tests and they match R which is good. fix a bug
34705bc [DB Tsai] first commit
---
 .../classification/LogisticRegression.scala   | 436 +++++++++++++++++-
 .../ml/regression/LinearRegression.scala      |  18 +-
 .../org/apache/spark/mllib/util/MLUtils.scala |   2 +-
 .../LogisticRegressionSuite.scala             | 395 +++++++++++++++-
 .../LogisticRegressionSuite.scala             |  10 +-
 5 files changed, 821 insertions(+), 40 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 550369d18cfec..647226a0d1a6e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -17,22 +17,31 @@
 
 package org.apache.spark.ml.classification
 
+import scala.collection.mutable
+
+import breeze.linalg.{norm => brzNorm, DenseVector => BDV}
+import breeze.optimize.{LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
+import breeze.optimize.{CachedDiffFunction, DiffFunction}
+
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
-import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
 import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.linalg.BLAS._
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.{SparkException, Logging}
 
 /**
  * Params for logistic regression.
  */
 private[classification] trait LogisticRegressionParams extends ProbabilisticClassifierParams
-  with HasRegParam with HasMaxIter with HasFitIntercept with HasThreshold {
-
-  setDefault(regParam -> 0.1, maxIter -> 100, threshold -> 0.5)
-}
+  with HasRegParam with HasElasticNetParam with HasMaxIter with HasFitIntercept with HasTol
+  with HasThreshold
 
 /**
  * :: AlphaComponent ::
@@ -43,45 +52,171 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 @AlphaComponent
 class LogisticRegression
   extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel]
-  with LogisticRegressionParams {
+  with LogisticRegressionParams with Logging {
 
-  /** @group setParam */
+  /**
+   * Set the regularization parameter.
+   * Default is 0.0.
+   * @group setParam
+   */
   def setRegParam(value: Double): this.type = set(regParam, value)
+  setDefault(regParam -> 0.0)
 
-  /** @group setParam */
+  /**
+   * Set the ElasticNet mixing parameter.
+   * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
+   * For 0 < alpha < 1, the penalty is a combination of L1 and L2.
+   * Default is 0.0 which is an L2 penalty.
+   * @group setParam
+   */
+  def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
+  setDefault(elasticNetParam -> 0.0)
+
+  /**
+   * Set the maximal number of iterations.
+   * Default is 100.
+   * @group setParam
+   */
   def setMaxIter(value: Int): this.type = set(maxIter, value)
+  setDefault(maxIter -> 100)
+
+  /**
+   * Set the convergence tolerance of iterations.
+   * Smaller value will lead to higher accuracy with the cost of more iterations.
+   * Default is 1E-6.
+   * @group setParam
+   */
+  def setTol(value: Double): this.type = set(tol, value)
+  setDefault(tol -> 1E-6)
 
   /** @group setParam */
   def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
+  setDefault(fitIntercept -> true)
 
   /** @group setParam */
   def setThreshold(value: Double): this.type = set(threshold, value)
+  setDefault(threshold -> 0.5)
 
   override protected def train(dataset: DataFrame): LogisticRegressionModel = {
     // Extract columns from data.  If dataset is persisted, do not persist oldDataset.
-    val oldDataset = extractLabeledPoints(dataset)
+    val instances = extractLabeledPoints(dataset).map {
+      case LabeledPoint(label: Double, features: Vector) => (label, features)
+    }
     val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
-    if (handlePersistence) {
-      oldDataset.persist(StorageLevel.MEMORY_AND_DISK)
+    if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
+
+    val (summarizer, labelSummarizer) = instances.treeAggregate(
+      (new MultivariateOnlineSummarizer, new MultiClassSummarizer))(
+        seqOp = (c, v) => (c, v) match {
+          case ((summarizer: MultivariateOnlineSummarizer, labelSummarizer: MultiClassSummarizer),
+          (label: Double, features: Vector)) =>
+            (summarizer.add(features), labelSummarizer.add(label))
+      },
+        combOp = (c1, c2) => (c1, c2) match {
+          case ((summarizer1: MultivariateOnlineSummarizer,
+          classSummarizer1: MultiClassSummarizer), (summarizer2: MultivariateOnlineSummarizer,
+          classSummarizer2: MultiClassSummarizer)) =>
+            (summarizer1.merge(summarizer2), classSummarizer1.merge(classSummarizer2))
+      })
+
+    val histogram = labelSummarizer.histogram
+    val numInvalid = labelSummarizer.countInvalid
+    val numClasses = histogram.length
+    val numFeatures = summarizer.mean.size
+
+    if (numInvalid != 0) {
+      val msg = s"Classification labels should be in {0 to ${numClasses - 1} " +
+        s"Found $numInvalid invalid labels."
+      logError(msg)
+      throw new SparkException(msg)
+    }
+
+    if (numClasses > 2) {
+      val msg = s"Currently, LogisticRegression with ElasticNet in ML package only supports " +
+        s"binary classification. Found $numClasses in the input dataset."
+      logError(msg)
+      throw new SparkException(msg)
     }
 
-    // Train model
-    val lr = new LogisticRegressionWithLBFGS()
-      .setIntercept($(fitIntercept))
-    lr.optimizer
-      .setRegParam($(regParam))
-      .setNumIterations($(maxIter))
-    val oldModel = lr.run(oldDataset)
-    val lrm = new LogisticRegressionModel(this, oldModel.weights, oldModel.intercept)
+    val featuresMean = summarizer.mean.toArray
+    val featuresStd = summarizer.variance.toArray.map(math.sqrt)
 
-    if (handlePersistence) {
-      oldDataset.unpersist()
+    val regParamL1 = $(elasticNetParam) * $(regParam)
+    val regParamL2 = (1.0 - $(elasticNetParam)) * $(regParam)
+
+    val costFun = new LogisticCostFun(instances, numClasses, $(fitIntercept),
+      featuresStd, featuresMean, regParamL2)
+
+    val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) {
+      new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
+    } else {
+      // Remove the L1 penalization on the intercept
+      def regParamL1Fun = (index: Int) => {
+        if (index == numFeatures) 0.0 else regParamL1
+      }
+      new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
     }
-    copyValues(lrm)
+
+    val initialWeightsWithIntercept =
+      Vectors.zeros(if ($(fitIntercept)) numFeatures + 1 else numFeatures)
+
+    if ($(fitIntercept)) {
+      /**
+       * For binary logistic regression, when we initialize the weights as zeros,
+       * it will converge faster if we initialize the intercept such that
+       * it follows the distribution of the labels.
+       *
+       * {{{
+       * P(0) = 1 / (1 + \exp(b)), and
+       * P(1) = \exp(b) / (1 + \exp(b))
+       * }}}, hence
+       * {{{
+       * b = \log{P(1) / P(0)} = \log{count_1 / count_0}
+       * }}}
+       */
+      initialWeightsWithIntercept.toArray(numFeatures)
+        = Math.log(histogram(1).toDouble / histogram(0).toDouble)
+    }
+
+    val states = optimizer.iterations(new CachedDiffFunction(costFun),
+      initialWeightsWithIntercept.toBreeze.toDenseVector)
+
+    var state = states.next()
+    val lossHistory = mutable.ArrayBuilder.make[Double]
+
+    while (states.hasNext) {
+      lossHistory += state.value
+      state = states.next()
+    }
+    lossHistory += state.value
+
+    // The weights are trained in the scaled space; we're converting them back to
+    // the original space.
+    val weightsWithIntercept = {
+      val rawWeights = state.x.toArray.clone()
+      var i = 0
+      // Note that the intercept in scaled space and original space is the same;
+      // as a result, no scaling is needed.
+      while (i < numFeatures) {
+        rawWeights(i) *= { if (featuresStd(i) != 0.0) 1.0 / featuresStd(i) else 0.0 }
+        i += 1
+      }
+      Vectors.dense(rawWeights)
+    }
+
+    if (handlePersistence) instances.unpersist()
+
+    val (weights, intercept) = if ($(fitIntercept)) {
+      (Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1)),
+        weightsWithIntercept(weightsWithIntercept.size - 1))
+    } else {
+      (weightsWithIntercept, 0.0)
+    }
+
+    new LogisticRegressionModel(this, weights.compressed, intercept)
   }
 }
 
-
 /**
  * :: AlphaComponent ::
  *
@@ -159,3 +294,258 @@ class LogisticRegressionModel private[ml] (
     if (probability(1) > getThreshold) 1 else 0
   }
 }
+
+/**
+ * MultiClassSummarizer computes the number of distinct labels and corresponding counts,
+ * and validates the data to see if the labels used for k class multi-label classification
+ * are in the range of {0, 1, ..., k - 1} in a online fashion.
+ *
+ * Two MultilabelSummarizer can be merged together to have a statistical summary of the
+ * corresponding joint dataset.
+ */
+private[classification] class MultiClassSummarizer extends Serializable {
+  private val distinctMap = new mutable.HashMap[Int, Long]
+  private var totalInvalidCnt: Long = 0L
+
+  /**
+   * Add a new label into this MultilabelSummarizer, and update the distinct map.
+   * @param label The label for this data point.
+   * @return This MultilabelSummarizer
+   */
+  def add(label: Double): this.type = {
+    if (label - label.toInt != 0.0 || label < 0) {
+      totalInvalidCnt += 1
+      this
+    }
+    else {
+      val counts: Long = distinctMap.getOrElse(label.toInt, 0L)
+      distinctMap.put(label.toInt, counts + 1)
+      this
+    }
+  }
+
+  /**
+   * Merge another MultilabelSummarizer, and update the distinct map.
+   * (Note that it will merge the smaller distinct map into the larger one using in-place
+   * merging, so either `this` or `other` object will be modified and returned.)
+   *
+   * @param other The other MultilabelSummarizer to be merged.
+   * @return Merged MultilabelSummarizer object.
+   */
+  def merge(other: MultiClassSummarizer): MultiClassSummarizer = {
+    val (largeMap, smallMap) = if (this.distinctMap.size > other.distinctMap.size) {
+      (this, other)
+    } else {
+      (other, this)
+    }
+    smallMap.distinctMap.foreach {
+      case (key, value) =>
+        val counts = largeMap.distinctMap.getOrElse(key, 0L)
+        largeMap.distinctMap.put(key, counts + value)
+    }
+    largeMap.totalInvalidCnt += smallMap.totalInvalidCnt
+    largeMap
+  }
+
+  /** @return The total invalid input counts. */
+  def countInvalid: Long = totalInvalidCnt
+
+  /** @return The number of distinct labels in the input dataset. */
+  def numClasses: Int = distinctMap.keySet.max + 1
+
+  /** @return The counts of each label in the input dataset. */
+  def histogram: Array[Long] = {
+    val result = Array.ofDim[Long](numClasses)
+    var i = 0
+    while (i < result.length) {
+      result(i) = distinctMap.getOrElse(i, 0L)
+      i += 1
+    }
+    result
+  }
+}
+
+/**
+ * LogisticAggregator computes the gradient and loss for binary logistic loss function, as used
+ * in binary classification for samples in sparse or dense vector in a online fashion.
+ *
+ * Note that multinomial logistic loss is not supported yet!
+ *
+ * Two LogisticAggregator can be merged together to have a summary of loss and gradient of
+ * the corresponding joint dataset.
+ *
+ * @param weights The weights/coefficients corresponding to the features.
+ * @param numClasses the number of possible outcomes for k classes classification problem in
+ *                   Multinomial Logistic Regression.
+ * @param fitIntercept Whether to fit an intercept term.
+ * @param featuresStd The standard deviation values of the features.
+ * @param featuresMean The mean values of the features.
+ */
+private class LogisticAggregator(
+    weights: Vector,
+    numClasses: Int,
+    fitIntercept: Boolean,
+    featuresStd: Array[Double],
+    featuresMean: Array[Double]) extends Serializable {
+
+  private var totalCnt: Long = 0L
+  private var lossSum = 0.0
+
+  private val weightsArray = weights match {
+    case dv: DenseVector => dv.values
+    case _ =>
+      throw new IllegalArgumentException(
+        s"weights only supports dense vector but got type ${weights.getClass}.")
+  }
+
+  private val dim = if (fitIntercept) weightsArray.length - 1 else weightsArray.length
+
+  private val gradientSumArray = Array.ofDim[Double](weightsArray.length)
+
+  /**
+   * Add a new training data to this LogisticAggregator, and update the loss and gradient
+   * of the objective function.
+   *
+   * @param label The label for this data point.
+   * @param data The features for one data point in dense/sparse vector format to be added
+   *             into this aggregator.
+   * @return This LogisticAggregator object.
+   */
+  def add(label: Double, data: Vector): this.type = {
+    require(dim == data.size, s"Dimensions mismatch when adding new sample." +
+      s" Expecting $dim but got ${data.size}.")
+
+    val dataSize = data.size
+
+    val localWeightsArray = weightsArray
+    val localGradientSumArray = gradientSumArray
+
+    numClasses match {
+      case 2 =>
+        /**
+         * For Binary Logistic Regression.
+         */
+        val margin = - {
+          var sum = 0.0
+          data.foreachActive { (index, value) =>
+            if (featuresStd(index) != 0.0 && value != 0.0) {
+              sum += localWeightsArray(index) * (value / featuresStd(index))
+            }
+          }
+          sum + { if (fitIntercept) localWeightsArray(dim) else 0.0 }
+        }
+
+        val multiplier = (1.0 / (1.0 + math.exp(margin))) - label
+
+        data.foreachActive { (index, value) =>
+          if (featuresStd(index) != 0.0 && value != 0.0) {
+            localGradientSumArray(index) += multiplier * (value / featuresStd(index))
+          }
+        }
+
+        if (fitIntercept) {
+          localGradientSumArray(dim) += multiplier
+        }
+
+        if (label > 0) {
+          // The following is equivalent to log(1 + exp(margin)) but more numerically stable.
+          lossSum += MLUtils.log1pExp(margin)
+        } else {
+          lossSum += MLUtils.log1pExp(margin) - margin
+        }
+      case _ =>
+        new NotImplementedError("LogisticRegression with ElasticNet in ML package only supports " +
+          "binary classification for now.")
+    }
+    totalCnt += 1
+    this
+  }
+
+  /**
+   * Merge another LogisticAggregator, and update the loss and gradient
+   * of the objective function.
+   * (Note that it's in place merging; as a result, `this` object will be modified.)
+   *
+   * @param other The other LogisticAggregator to be merged.
+   * @return This LogisticAggregator object.
+   */
+  def merge(other: LogisticAggregator): this.type = {
+    require(dim == other.dim, s"Dimensions mismatch when merging with another " +
+      s"LeastSquaresAggregator. Expecting $dim but got ${other.dim}.")
+
+    if (other.totalCnt != 0) {
+      totalCnt += other.totalCnt
+      lossSum += other.lossSum
+
+      var i = 0
+      val localThisGradientSumArray = this.gradientSumArray
+      val localOtherGradientSumArray = other.gradientSumArray
+      while (i < localThisGradientSumArray.length) {
+        localThisGradientSumArray(i) += localOtherGradientSumArray(i)
+        i += 1
+      }
+    }
+    this
+  }
+
+  def count: Long = totalCnt
+
+  def loss: Double = lossSum / totalCnt
+
+  def gradient: Vector = {
+    val result = Vectors.dense(gradientSumArray.clone())
+    scal(1.0 / totalCnt, result)
+    result
+  }
+}
+
+/**
+ * LogisticCostFun implements Breeze's DiffFunction[T] for a multinomial logistic loss function,
+ * as used in multi-class classification (it is also used in binary logistic regression).
+ * It returns the loss and gradient with L2 regularization at a particular point (weights).
+ * It's used in Breeze's convex optimization routines.
+ */
+private class LogisticCostFun(
+    data: RDD[(Double, Vector)],
+    numClasses: Int,
+    fitIntercept: Boolean,
+    featuresStd: Array[Double],
+    featuresMean: Array[Double],
+    regParamL2: Double) extends DiffFunction[BDV[Double]] {
+
+  override def calculate(weights: BDV[Double]): (Double, BDV[Double]) = {
+    val w = Vectors.fromBreeze(weights)
+
+    val logisticAggregator = data.treeAggregate(new LogisticAggregator(w, numClasses, fitIntercept,
+      featuresStd, featuresMean))(
+        seqOp = (c, v) => (c, v) match {
+          case (aggregator, (label, features)) => aggregator.add(label, features)
+        },
+        combOp = (c1, c2) => (c1, c2) match {
+          case (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
+        })
+
+    // regVal is the sum of weight squares for L2 regularization
+    val norm = if (regParamL2 == 0.0) {
+      0.0
+    } else if (fitIntercept) {
+      brzNorm(Vectors.dense(weights.toArray.slice(0, weights.size -1)).toBreeze, 2.0)
+    } else {
+      brzNorm(weights, 2.0)
+    }
+    val regVal = 0.5 * regParamL2 * norm * norm
+
+    val loss = logisticAggregator.loss + regVal
+    val gradient = logisticAggregator.gradient
+
+    if (fitIntercept) {
+      val wArray = w.toArray.clone()
+      wArray(wArray.length - 1) = 0.0
+      axpy(regParamL2, Vectors.dense(wArray), gradient)
+    } else {
+      axpy(regParamL2, w, gradient)
+    }
+
+    (loss, gradient.toBreeze.asInstanceOf[BDV[Double]])
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index e63c9a3eead52..6377923afc0c4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -106,14 +106,16 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
     val (summarizer, statCounter) = instances.treeAggregate(
-      (new MultivariateOnlineSummarizer, new StatCounter))( {
-        case ((summarizer: MultivariateOnlineSummarizer, statCounter: StatCounter),
-        (label: Double, features: Vector)) =>
-          (summarizer.add(features), statCounter.merge(label))
-      }, {
-        case ((summarizer1: MultivariateOnlineSummarizer, statCounter1: StatCounter),
-        (summarizer2: MultivariateOnlineSummarizer, statCounter2: StatCounter)) =>
-          (summarizer1.merge(summarizer2), statCounter1.merge(statCounter2))
+      (new MultivariateOnlineSummarizer, new StatCounter))(
+        seqOp = (c, v) => (c, v) match {
+          case ((summarizer: MultivariateOnlineSummarizer, statCounter: StatCounter),
+          (label: Double, features: Vector)) =>
+            (summarizer.add(features), statCounter.merge(label))
+      },
+        combOp = (c1, c2) => (c1, c2) match {
+          case ((summarizer1: MultivariateOnlineSummarizer, statCounter1: StatCounter),
+          (summarizer2: MultivariateOnlineSummarizer, statCounter2: StatCounter)) =>
+            (summarizer1.merge(summarizer2), statCounter1.merge(statCounter2))
       })
 
     val numFeatures = summarizer.mean.size
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 5d6ddd47f67d6..681f4c618d302 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -331,7 +331,7 @@ object MLUtils {
    * @param x a floating-point value as input.
    * @return the result of `math.log(1 + math.exp(x))`.
    */
-  private[mllib] def log1pExp(x: Double): Double = {
+  private[spark] def log1pExp(x: Double): Double = {
     if (x > 0) {
       x + math.log1p(math.exp(-x))
     } else {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 6dd1fdf05514e..78cdd471859b7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.classification
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.mllib.classification.LogisticRegressionSuite
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
@@ -30,13 +30,42 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
   @transient var sqlContext: SQLContext = _
   @transient var dataset: DataFrame = _
+  @transient var binaryDataset: DataFrame = _
   private val eps: Double = 1e-5
 
   override def beforeAll(): Unit = {
     super.beforeAll()
     sqlContext = new SQLContext(sc)
-    dataset = sqlContext.createDataFrame(
-      sc.parallelize(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42), 2))
+
+    dataset = sqlContext.createDataFrame(sc.parallelize(LogisticRegressionSuite
+      .generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42), 4))
+
+    /**
+     * Here is the instruction describing how to export the test data into CSV format
+     * so we can validate the training accuracy compared with R's glmnet package.
+     *
+     * import org.apache.spark.mllib.classification.LogisticRegressionSuite
+     * val nPoints = 10000
+     * val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+     * val xMean = Array(5.843, 3.057, 3.758, 1.199)
+     * val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
+     * val data = sc.parallelize(LogisticRegressionSuite.generateMultinomialLogisticInput(
+     *   weights, xMean, xVariance, true, nPoints, 42), 1)
+     * data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1) + ", "
+     *   + x.features(2) + ", " + x.features(3)).saveAsTextFile("path")
+     */
+    binaryDataset = {
+      val nPoints = 10000
+      val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191)
+      val xMean = Array(5.843, 3.057, 3.758, 1.199)
+      val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
+
+      val testData = LogisticRegressionSuite.generateMultinomialLogisticInput(
+        weights, xMean, xVariance, true, nPoints, 42)
+
+      sqlContext.createDataFrame(sc.parallelize(LogisticRegressionSuite
+        .generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42), 4))
+    }
   }
 
   test("logistic regression: default params") {
@@ -135,4 +164,364 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
         assert(pred == predFromProb)
     }
   }
+
+  test("MultiClassSummarizer") {
+    val summarizer1 = (new MultiClassSummarizer)
+      .add(0.0).add(3.0).add(4.0).add(3.0).add(6.0)
+    assert(summarizer1.histogram.zip(Array[Long](1, 0, 0, 2, 1, 0, 1)).forall(x => x._1 === x._2))
+    assert(summarizer1.countInvalid === 0)
+    assert(summarizer1.numClasses === 7)
+
+    val summarizer2 = (new MultiClassSummarizer)
+      .add(1.0).add(5.0).add(3.0).add(0.0).add(4.0).add(1.0)
+    assert(summarizer2.histogram.zip(Array[Long](1, 2, 0, 1, 1, 1)).forall(x => x._1 === x._2))
+    assert(summarizer2.countInvalid === 0)
+    assert(summarizer2.numClasses === 6)
+
+    val summarizer3 = (new MultiClassSummarizer)
+      .add(0.0).add(1.3).add(5.2).add(2.5).add(2.0).add(4.0).add(4.0).add(4.0).add(1.0)
+    assert(summarizer3.histogram.zip(Array[Long](1, 1, 1, 0, 3)).forall(x => x._1 === x._2))
+    assert(summarizer3.countInvalid === 3)
+    assert(summarizer3.numClasses === 5)
+
+    val summarizer4 = (new MultiClassSummarizer)
+      .add(3.1).add(4.3).add(2.0).add(1.0).add(3.0)
+    assert(summarizer4.histogram.zip(Array[Long](0, 1, 1, 1)).forall(x => x._1 === x._2))
+    assert(summarizer4.countInvalid === 2)
+    assert(summarizer4.numClasses === 4)
+
+    // small map merges large one
+    val summarizerA = summarizer1.merge(summarizer2)
+    assert(summarizerA.hashCode() === summarizer2.hashCode())
+    assert(summarizerA.histogram.zip(Array[Long](2, 2, 0, 3, 2, 1, 1)).forall(x => x._1 === x._2))
+    assert(summarizerA.countInvalid === 0)
+    assert(summarizerA.numClasses === 7)
+
+    // large map merges small one
+    val summarizerB = summarizer3.merge(summarizer4)
+    assert(summarizerB.hashCode() === summarizer3.hashCode())
+    assert(summarizerB.histogram.zip(Array[Long](1, 2, 2, 1, 3)).forall(x => x._1 === x._2))
+    assert(summarizerB.countInvalid === 5)
+    assert(summarizerB.numClasses === 5)
+  }
+
+  test("binary logistic regression with intercept without regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(true)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                     s0
+     * (Intercept)  2.8366423
+     * data.V2     -0.5895848
+     * data.V3      0.8931147
+     * data.V4     -0.3925051
+     * data.V5     -0.7996864
+     */
+    val interceptR = 2.8366423
+    val weightsR = Array(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
+
+    assert(model.intercept ~== interceptR relTol 1E-3)
+    assert(model.weights(0) ~== weightsR(0) relTol 1E-3)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-3)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-3)
+  }
+
+  test("binary logistic regression without intercept without regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(false)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights =
+     *     coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0, intercept=FALSE))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                     s0
+     * (Intercept)   .
+     * data.V2     -0.3534996
+     * data.V3      1.2964482
+     * data.V4     -0.3571741
+     * data.V5     -0.7407946
+     */
+    val interceptR = 0.0
+    val weightsR = Array(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
+
+    assert(model.intercept ~== interceptR relTol 1E-3)
+    assert(model.weights(0) ~== weightsR(0) relTol 1E-2)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-2)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-3)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-3)
+  }
+
+  test("binary logistic regression with intercept with L1 regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(1.0).setRegParam(0.12)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept) -0.05627428
+     * data.V2       .
+     * data.V3       .
+     * data.V4     -0.04325749
+     * data.V5     -0.02481551
+     */
+    val interceptR = -0.05627428
+    val weightsR = Array(0.0, 0.0, -0.04325749, -0.02481551)
+
+    assert(model.intercept ~== interceptR relTol 1E-2)
+    assert(model.weights(0) ~== weightsR(0) relTol 1E-3)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-2)
+    assert(model.weights(3) ~== weightsR(3) relTol 2E-2)
+  }
+
+  test("binary logistic regression without intercept with L1 regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(1.0).setRegParam(0.12)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12,
+     *     intercept=FALSE))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept)   .
+     * data.V2       .
+     * data.V3       .
+     * data.V4     -0.05189203
+     * data.V5     -0.03891782
+     */
+    val interceptR = 0.0
+    val weightsR = Array(0.0, 0.0, -0.05189203, -0.03891782)
+
+    assert(model.intercept ~== interceptR relTol 1E-3)
+    assert(model.weights(0) ~== weightsR(0) relTol 1E-3)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-2)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-2)
+  }
+
+  test("binary logistic regression with intercept with L2 regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.0).setRegParam(1.37)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept)  0.15021751
+     * data.V2     -0.07251837
+     * data.V3      0.10724191
+     * data.V4     -0.04865309
+     * data.V5     -0.10062872
+     */
+    val interceptR = 0.15021751
+    val weightsR = Array(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
+
+    assert(model.intercept ~== interceptR relTol 1E-3)
+    assert(model.weights(0) ~== weightsR(0) relTol 1E-3)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-3)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-3)
+  }
+
+  test("binary logistic regression without intercept with L2 regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.0).setRegParam(1.37)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37,
+     *     intercept=FALSE))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept)   .
+     * data.V2     -0.06099165
+     * data.V3      0.12857058
+     * data.V4     -0.04708770
+     * data.V5     -0.09799775
+     */
+    val interceptR = 0.0
+    val weightsR = Array(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
+
+    assert(model.intercept ~== interceptR relTol 1E-3)
+    assert(model.weights(0) ~== weightsR(0) relTol 1E-2)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-2)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-3)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-3)
+  }
+
+  test("binary logistic regression with intercept with ElasticNet regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(0.38).setRegParam(0.21)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept)  0.57734851
+     * data.V2     -0.05310287
+     * data.V3       .
+     * data.V4     -0.08849250
+     * data.V5     -0.15458796
+     */
+    val interceptR = 0.57734851
+    val weightsR = Array(-0.05310287, 0.0, -0.08849250, -0.15458796)
+
+    assert(model.intercept ~== interceptR relTol 6E-3)
+    assert(model.weights(0) ~== weightsR(0) relTol 5E-3)
+    assert(model.weights(1) ~== weightsR(1) relTol 1E-3)
+    assert(model.weights(2) ~== weightsR(2) relTol 5E-3)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-3)
+  }
+
+  test("binary logistic regression without intercept with ElasticNet regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(false)
+      .setElasticNetParam(0.38).setRegParam(0.21)
+    val model = trainer.fit(binaryDataset)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21,
+     *     intercept=FALSE))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept)   .
+     * data.V2     -0.001005743
+     * data.V3      0.072577857
+     * data.V4     -0.081203769
+     * data.V5     -0.142534158
+     */
+    val interceptR = 0.0
+    val weightsR = Array(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
+
+    assert(model.intercept ~== interceptR relTol 1E-3)
+    assert(model.weights(0) ~== weightsR(0) absTol 1E-3)
+    assert(model.weights(1) ~== weightsR(1) absTol 1E-2)
+    assert(model.weights(2) ~== weightsR(2) relTol 1E-3)
+    assert(model.weights(3) ~== weightsR(3) relTol 1E-2)
+  }
+
+  test("binary logistic regression with intercept with strong L1 regularization") {
+    val trainer = (new LogisticRegression).setFitIntercept(true)
+      .setElasticNetParam(1.0).setRegParam(6.0)
+    val model = trainer.fit(binaryDataset)
+
+    val histogram = binaryDataset.map { case Row(label: Double, features: Vector) => label }
+      .treeAggregate(new MultiClassSummarizer)(
+        seqOp = (c, v) => (c, v) match {
+          case (classSummarizer: MultiClassSummarizer, label: Double) => classSummarizer.add(label)
+        },
+        combOp = (c1, c2) => (c1, c2) match {
+          case (classSummarizer1: MultiClassSummarizer, classSummarizer2: MultiClassSummarizer) =>
+            classSummarizer1.merge(classSummarizer2)
+        }).histogram
+
+    /**
+     * For binary logistic regression with strong L1 regularization, all the weights will be zeros.
+     * As a result,
+     * {{{
+     * P(0) = 1 / (1 + \exp(b)), and
+     * P(1) = \exp(b) / (1 + \exp(b))
+     * }}}, hence
+     * {{{
+     * b = \log{P(1) / P(0)} = \log{count_1 / count_0}
+     * }}}
+     */
+    val interceptTheory = Math.log(histogram(1).toDouble / histogram(0).toDouble)
+    val weightsTheory = Array(0.0, 0.0, 0.0, 0.0)
+
+    assert(model.intercept ~== interceptTheory relTol 1E-5)
+    assert(model.weights(0) ~== weightsTheory(0) absTol 1E-6)
+    assert(model.weights(1) ~== weightsTheory(1) absTol 1E-6)
+    assert(model.weights(2) ~== weightsTheory(2) absTol 1E-6)
+    assert(model.weights(3) ~== weightsTheory(3) absTol 1E-6)
+
+    /**
+     * Using the following R code to load the data and train the model using glmnet package.
+     *
+     * > library("glmnet")
+     * > data <- read.csv("path", header=FALSE)
+     * > label = factor(data$V1)
+     * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
+     * > weights = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0))
+     * > weights
+     * 5 x 1 sparse Matrix of class "dgCMatrix"
+     *                      s0
+     * (Intercept) -0.2480643
+     * data.V2      0.0000000
+     * data.V3       .
+     * data.V4       .
+     * data.V5       .
+     */
+    val interceptR = -0.248065
+    val weightsR = Array(0.0, 0.0, 0.0, 0.0)
+
+    assert(model.intercept ~== interceptR relTol 1E-5)
+    assert(model.weights(0) ~== weightsR(0) absTol 1E-6)
+    assert(model.weights(1) ~== weightsR(1) absTol 1E-6)
+    assert(model.weights(2) ~== weightsR(2) absTol 1E-6)
+    assert(model.weights(3) ~== weightsR(3) absTol 1E-6)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index a26c52852c4d7..fb0a194718802 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -91,21 +91,21 @@ object LogisticRegressionSuite {
       seed: Int): Seq[LabeledPoint] = {
     val rnd = new Random(seed)
 
-    val xDim = xMean.size
+    val xDim = xMean.length
     val xWithInterceptsDim = if (addIntercept) xDim + 1 else xDim
-    val nClasses = weights.size / xWithInterceptsDim + 1
+    val nClasses = weights.length / xWithInterceptsDim + 1
 
     val x = Array.fill[Vector](nPoints)(Vectors.dense(Array.fill[Double](xDim)(rnd.nextGaussian())))
 
-    x.map(vector => {
+    x.foreach { vector =>
       // This doesn't work if `vector` is a sparse vector.
       val vectorArray = vector.toArray
       var i = 0
-      while (i < vectorArray.size) {
+      while (i < vectorArray.length) {
         vectorArray(i) = vectorArray(i) * math.sqrt(xVariance(i)) + xMean(i)
         i += 1
       }
-    })
+    }
 
     val y = (0 until nPoints).map { idx =>
       val xArray = x(idx).toArray

From 29926238418223b0888d418d163feebf0217b35e Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Fri, 8 May 2015 21:55:54 -0700
Subject: [PATCH 062/320] [SPARK-7498] [ML] removed varargs annotation from
 Params.setDefaults

In SPARK-7429 and PR https://github.com/apache/spark/pull/5960, I added the varargs annotation to Params.setDefault which takes a variable number of ParamPairs. It worked locally and on Jenkins for me.
However, mengxr reported issues compiling on his machine. So I'm reverting the change introduced in https://github.com/apache/spark/pull/5960 by removing varargs.

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6021 from jkbradley/revert-varargs and squashes the following commits:

098ed39 [Joseph K. Bradley] removed varargs annotation from Params.setDefaults taking multiple ParamPairs
---
 mllib/src/main/scala/org/apache/spark/ml/param/params.scala    | 3 ++-
 .../test/java/org/apache/spark/ml/param/JavaTestParams.java    | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 0e1b60d172e3b..7ebbf106ee753 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -387,11 +387,12 @@ trait Params extends Identifiable with Serializable {
   /**
    * Sets default values for a list of params.
    *
+   * Note: Java developers should use the single-parameter [[setDefault()]].
+   *       Annotating this with varargs causes compilation failures. See SPARK-7498.
    * @param paramPairs  a list of param pairs that specify params and their default values to set
    *                    respectively. Make sure that the params are initialized before this method
    *                    gets called.
    */
-  @varargs
   protected final def setDefault(paramPairs: ParamPair[_]*): this.type = {
     paramPairs.foreach { p =>
       setDefault(p.param.asInstanceOf[Param[Any]], p.value)
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
index 532eca47918fc..8abe575610d19 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
@@ -59,6 +59,5 @@ public JavaTestParams() {
       ParamValidators.inArray(validStrings));
     setDefault(myIntParam, 1);
     setDefault(myDoubleParam, 0.5);
-    setDefault(myIntParam.w(1), myDoubleParam.w(0.5));
   }
 }

From dda6d9f4045fa2d1265abffa9d7dbdc967448417 Mon Sep 17 00:00:00 2001
From: Vinod K C <vinod.kc@huawei.com>
Date: Sat, 9 May 2015 10:03:15 +0100
Subject: [PATCH 063/320] [SPARK-7438] [SPARK CORE] Fixed validation of
 relativeSD in countApproxDistinct

Author: Vinod K C <vinod.kc@huawei.com>

Closes #5974 from vinodkc/fix_countApproxDistinct_Validation and squashes the following commits:

3a3d59c [Vinod K C] Reverted removal of validation relativeSD<0.000017
799976e [Vinod K C] Removed testcase to assert IAE when relativeSD>3.7
8ddbfae [Vinod K C] Remove blank line
b1b00a3 [Vinod K C] Removed relativeSD validation from python API,RDD.scala will do validation
122d378 [Vinod K C] Fixed validation of relativeSD in  countApproxDistinct
---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala      | 7 ++++---
 core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala | 2 ++
 python/pyspark/rdd.py                                   | 2 --
 python/pyspark/tests.py                                 | 1 -
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 8baf199f215fb..7dad30ecbdd2f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1161,8 +1161,8 @@ abstract class RDD[T: ClassTag](
    */
   @Experimental
   def countApproxDistinct(p: Int, sp: Int): Long = withScope {
-    require(p >= 4, s"p ($p) must be at least 4")
-    require(sp <= 32, s"sp ($sp) cannot be greater than 32")
+    require(p >= 4, s"p ($p) must be >= 4")
+    require(sp <= 32, s"sp ($sp) must be <= 32")
     require(sp == 0 || p <= sp, s"p ($p) cannot be greater than sp ($sp)")
     val zeroCounter = new HyperLogLogPlus(p, sp)
     aggregate(zeroCounter)(
@@ -1187,8 +1187,9 @@ abstract class RDD[T: ClassTag](
    *                   It must be greater than 0.000017.
    */
   def countApproxDistinct(relativeSD: Double = 0.05): Long = withScope {
+    require(relativeSD > 0.000017, s"accuracy ($relativeSD) must be greater than 0.000017")
     val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2)).toInt
-    countApproxDistinct(p, 0)
+    countApproxDistinct(if (p < 4) 4 else p, 0)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index ef8c36a28655b..afc11bdc4d6ab 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -89,6 +89,8 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     val simpleRdd = sc.makeRDD(uniformDistro, 10)
     assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2)
     assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1)
+    assert(error(simpleRdd.countApproxDistinct(0.02), size) < 0.1)
+    assert(error(simpleRdd.countApproxDistinct(0.5), size) < 0.22)
   }
 
   test("SparkContext.union") {
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d254deb527d10..545c5ad20cb96 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2239,8 +2239,6 @@ def countApproxDistinct(self, relativeSD=0.05):
         """
         if relativeSD < 0.000017:
             raise ValueError("relativeSD should be greater than 0.000017")
-        if relativeSD > 0.37:
-            raise ValueError("relativeSD should be smaller than 0.37")
         # the hash space in Java is 2^32
         hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
         return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index ea63a396da5b8..09de4d159fdcf 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -644,7 +644,6 @@ def test_count_approx_distinct(self):
         self.assertTrue(18 < rdd.map(lambda x: (x, -x)).countApproxDistinct() < 22)
 
         self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.00000001))
-        self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.5))
 
     def test_histogram(self):
         # empty

From 12b95abc7047a8f2fd25a3c8dbb9904eb305eba6 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Sat, 9 May 2015 10:10:29 +0100
Subject: [PATCH 064/320] [SPARK-7403] [WEBUI] Link URL in objects on Timeline
 View is wrong in case of running on YARN

When we use Spark on YARN and have AllJobPage via ResourceManager's proxy, the link URL in objects which represent each job on timeline view is wrong.

In timeline-view.js, the link is generated as follows.
```
window.location.href = "job/?id=" + getJobId(this);
```

This assumes the URL displayed on the web browser ends with "jobs/" but when we access AllJobPage via the proxy, the url displayed does not end with "jobs/"

The proxy doesn't return status code 301 or 302 so the url displayed still indicates the base url, not "/jobs" even though displaying AllJobPages.

![2015-05-07 3 34 37](https://cloud.githubusercontent.com/assets/4736016/7501079/a8507ad6-f46c-11e4-9bed-62abea170f4c.png)

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #5947 from sarutak/fix-link-in-timeline and squashes the following commits:

aaf40e1 [Kousuke Saruta] Added Copyright for vis.js
01bee7b [Kousuke Saruta] Fixed timeline-view.js in order to get correct href
---
 LICENSE                                       | 16 ++++++++++
 .../apache/spark/ui/static/timeline-view.css  |  3 +-
 .../apache/spark/ui/static/timeline-view.js   | 31 +++++++------------
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/LICENSE b/LICENSE
index b2001f029a4f0..d6b9ccf07d999 100644
--- a/LICENSE
+++ b/LICENSE
@@ -836,6 +836,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
+========================================================================
+For vis.js (core/src/main/resources/org/apache/spark/ui/static/vis.min.js):
+========================================================================
+Copyright (C) 2010-2015 Almende B.V.
+
+Vis.js is dual licensed under both
+
+  * The Apache 2.0 License
+    http://www.apache.org/licenses/LICENSE-2.0
+
+and
+
+  * The MIT License
+    http://opensource.org/licenses/MIT
+
+Vis.js may be distributed under either license.
 
 ========================================================================
 BSD-style licenses
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
index d40de704229c3..d1e6d462b836f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
@@ -19,7 +19,8 @@ div#application-timeline, div#job-timeline {
   margin-bottom: 30px;
 }
 
-#application-timeline div.legend-area {
+#application-timeline div.legend-area,
+#job-timeline div.legend-area {
   margin-top: 5px;
 }
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
index 48fbb33b1155b..558beb8a5867f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
@@ -39,23 +39,24 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime) {
 
   function setupJobEventAction() {
     $(".item.range.job.application-timeline-object").each(function() {
-      var getJobId = function(baseElem) {
+      var getSelectorForJobEntry = function(baseElem) {
         var jobIdText = $($(baseElem).find(".application-timeline-content")[0]).text();
         var jobId = jobIdText.match("\\(Job (\\d+)\\)")[1];
-       return jobId;
+       return "#job-" + jobId;
       };
 
       $(this).click(function() {
-        window.location.href = "job/?id=" + getJobId(this);
+        var jobPagePath = $(getSelectorForJobEntry(this)).find("a").attr("href")
+          window.location.href = jobPagePath
       });
 
       $(this).hover(
         function() {
-          $("#job-" + getJobId(this)).addClass("corresponding-item-hover");
+          $(getSelectorForJobEntry(this)).addClass("corresponding-item-hover");
           $($(this).find("div.application-timeline-content")[0]).tooltip("show");
         },
         function() {
-          $("#job-" + getJobId(this)).removeClass("corresponding-item-hover");
+          $(getSelectorForJobEntry(this)).removeClass("corresponding-item-hover");
           $($(this).find("div.application-timeline-content")[0]).tooltip("hide");
         }
       );
@@ -97,32 +98,24 @@ function drawJobTimeline(groupArray, eventObjArray, startTime) {
 
   function setupStageEventAction() {
     $(".item.range.stage.job-timeline-object").each(function() {
-      var getStageIdAndAttempt = function(baseElem) {
+      var getSelectorForStageEntry = function(baseElem) {
         var stageIdText = $($(baseElem).find(".job-timeline-content")[0]).text();
         var stageIdAndAttempt = stageIdText.match("\\(Stage (\\d+\\.\\d+)\\)")[1].split(".");
-        return stageIdAndAttempt;
+        return "#stage-" + stageIdAndAttempt[0] + "-" + stageIdAndAttempt[1];
       };
 
       $(this).click(function() {
-        var idAndAttempt = getStageIdAndAttempt(this);
-        var id = idAndAttempt[0];
-        var attempt = idAndAttempt[1];
-        window.location.href = "../../stages/stage/?id=" + id + "&attempt=" + attempt;
+        var stagePagePath = $(getSelectorForStageEntry(this)).find("a").attr("href")
+        window.location.href = stagePagePath
       });
 
       $(this).hover(
         function() {
-          var idAndAttempt = getStageIdAndAttempt(this);
-          var id = idAndAttempt[0];
-          var attempt = idAndAttempt[1];
-          $("#stage-" + id + "-" + attempt).addClass("corresponding-item-hover");
+          $(getSelectorForStageEntry(this)).addClass("corresponding-item-hover");
           $($(this).find("div.job-timeline-content")[0]).tooltip("show");
         },
         function() {
-          var idAndAttempt = getStageIdAndAttempt(this);
-          var id = idAndAttempt[0];
-          var attempt = idAndAttempt[1];
-          $("#stage-" + id + "-" + attempt).removeClass("corresponding-item-hover");
+          $(getSelectorForStageEntry(this)).removeClass("corresponding-item-hover");
           $($(this).find("div.job-timeline-content")[0]).tooltip("hide");
         }
       );

From 7d0f17208cda641651dcbd1bc0da639cd74307e7 Mon Sep 17 00:00:00 2001
From: dobashim <dobashim@oss.nttdata.co.jp>
Date: Sat, 9 May 2015 10:14:46 +0100
Subject: [PATCH 065/320] [STREAMING] [DOCS] Fix wrong url about API docs of
 StreamingListener

A little fix about wrong url of the API document. (org.apache.spark.streaming.scheduler.StreamingListener)

Author: dobashim <dobashim@oss.nttdata.co.jp>

Closes #6024 from dobashim/master and squashes the following commits:

ac9a955 [dobashim] [STREAMING][DOCS] Fix wrong url about API docs of StreamingListener
---
 docs/streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 2f2fea53168a3..bd863d48d53e3 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -1915,7 +1915,7 @@ In that case, consider
 [reducing](#reducing-the-processing-time-of-each-batch) the batch processing time.
 
 The progress of a Spark Streaming program can also be monitored using the
-[StreamingListener](api/scala/index.html#org.apache.spark.scheduler.StreamingListener) interface,
+[StreamingListener](api/scala/index.html#org.apache.spark.streaming.scheduler.StreamingListener) interface,
 which allows you to get receiver status and processing times. Note that this is a developer API
 and it is likely to be improved upon (i.e., more information reported) in the future.
 

From 3071aac387ca0b80201022c9c2f245437c77a375 Mon Sep 17 00:00:00 2001
From: tedyu <yuzhihong@gmail.com>
Date: Sat, 9 May 2015 10:41:30 -0700
Subject: [PATCH 066/320] Upgrade version of jackson-databind in
 sql/core/pom.xml

Currently version of jackson-databind in sql/core/pom.xml is 2.3.0

This is older than the version specified in root pom.xml

This PR upgrades the version in sql/core/pom.xml so that they're consistent.

Author: tedyu <yuzhihong@gmail.com>

Closes #6028 from tedyu/master and squashes the following commits:

28c8394 [tedyu] Upgrade version of jackson-databind in sql/core/pom.xml
---
 sql/core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index e3a6b1fe72435..6a7151ad10390 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -64,7 +64,7 @@
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
-      <version>2.3.0</version>
+      <version>2.4.4</version>
     </dependency>
     <dependency>
       <groupId>org.jodd</groupId>

From bd74301ff87f545e5808e13dd50dea12edd3db92 Mon Sep 17 00:00:00 2001
From: tedyu <yuzhihong@gmail.com>
Date: Sat, 9 May 2015 13:19:07 -0700
Subject: [PATCH 067/320] [BUILD] Reference fasterxml.jackson.version in
 sql/core/pom.xml

Author: tedyu <yuzhihong@gmail.com>

Closes #6031 from tedyu/master and squashes the following commits:

5c2580c [tedyu] Reference fasterxml.jackson.version in sql/core/pom.xml
ff2a44f [tedyu] Merge branch 'master' of github.com:apache/spark
28c8394 [tedyu] Upgrade version of jackson-databind in sql/core/pom.xml
---
 sql/core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 6a7151ad10390..7d274a73e079f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -64,7 +64,7 @@
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
-      <version>2.4.4</version>
+      <version>${fasterxml.jackson.version}</version>
     </dependency>
     <dependency>
       <groupId>org.jodd</groupId>

From b13162b364aeff35e3bdeea9c9a31e5ce66f8c9a Mon Sep 17 00:00:00 2001
From: Yuhao Yang <hhbyyh@gmail.com>
Date: Sat, 9 May 2015 15:40:46 -0700
Subject: [PATCH 068/320] [SPARK-7475] [MLLIB] adjust ldaExample for online LDA

jira: https://issues.apache.org/jira/browse/SPARK-7475

Add a new argument to specify the algorithm applied to LDA, to exhibit the basic usage of LDAOptimizer.

cc jkbradley

Author: Yuhao Yang <hhbyyh@gmail.com>

Closes #6000 from hhbyyh/ldaExample and squashes the following commits:

0a7e2bc [Yuhao Yang] fix according to comments
5810b0f [Yuhao Yang] adjust ldaExample for online LDA
---
 .../spark/examples/mllib/LDAExample.scala     | 31 +++++++++++++++----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index a1850390c0a86..31d629f853161 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -26,7 +26,7 @@ import scopt.OptionParser
 import org.apache.log4j.{Level, Logger}
 
 import org.apache.spark.{SparkContext, SparkConf}
-import org.apache.spark.mllib.clustering.{DistributedLDAModel, LDA}
+import org.apache.spark.mllib.clustering.{EMLDAOptimizer, OnlineLDAOptimizer, DistributedLDAModel, LDA}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.rdd.RDD
 
@@ -48,6 +48,7 @@ object LDAExample {
       topicConcentration: Double = -1,
       vocabSize: Int = 10000,
       stopwordFile: String = "",
+      algorithm: String = "em",
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
@@ -78,6 +79,10 @@ object LDAExample {
         .text(s"filepath for a list of stopwords. Note: This must fit on a single machine." +
         s"  default: ${defaultParams.stopwordFile}")
         .action((x, c) => c.copy(stopwordFile = x))
+      opt[String]("algorithm")
+        .text(s"inference algorithm to use. em and online are supported." +
+        s" default: ${defaultParams.algorithm}")
+        .action((x, c) => c.copy(algorithm = x))
       opt[String]("checkpointDir")
         .text(s"Directory for checkpointing intermediate results." +
         s"  Checkpointing helps with recovery and eliminates temporary shuffle files on disk." +
@@ -128,7 +133,17 @@ object LDAExample {
 
     // Run LDA.
     val lda = new LDA()
-    lda.setK(params.k)
+
+    val optimizer = params.algorithm.toLowerCase match {
+      case "em" => new EMLDAOptimizer
+      // add (1.0 / actualCorpusSize) to MiniBatchFraction be more robust on tiny datasets.
+      case "online" => new OnlineLDAOptimizer().setMiniBatchFraction(0.05 + 1.0 / actualCorpusSize)
+      case _ => throw new IllegalArgumentException(
+        s"Only em, online are supported but got ${params.algorithm}.")
+    }
+
+    lda.setOptimizer(optimizer)
+      .setK(params.k)
       .setMaxIterations(params.maxIterations)
       .setDocConcentration(params.docConcentration)
       .setTopicConcentration(params.topicConcentration)
@@ -137,14 +152,18 @@ object LDAExample {
       sc.setCheckpointDir(params.checkpointDir.get)
     }
     val startTime = System.nanoTime()
-    val ldaModel = lda.run(corpus).asInstanceOf[DistributedLDAModel]
+    val ldaModel = lda.run(corpus)
     val elapsed = (System.nanoTime() - startTime) / 1e9
 
     println(s"Finished training LDA model.  Summary:")
     println(s"\t Training time: $elapsed sec")
-    val avgLogLikelihood = ldaModel.logLikelihood / actualCorpusSize.toDouble
-    println(s"\t Training data average log likelihood: $avgLogLikelihood")
-    println()
+
+    if (ldaModel.isInstanceOf[DistributedLDAModel]) {
+      val distLDAModel = ldaModel.asInstanceOf[DistributedLDAModel]
+      val avgLogLikelihood = distLDAModel.logLikelihood / actualCorpusSize.toDouble
+      println(s"\t Training data average log likelihood: $avgLogLikelihood")
+      println()
+    }
 
     // Print the topics, showing the top-weighted terms for each topic.
     val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10)

From bf7e81a51cd81706570615cd67362c86602dec88 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sun, 10 May 2015 00:57:14 -0700
Subject: [PATCH 069/320] [SPARK-6091] [MLLIB] Add MulticlassMetrics in
 PySpark/MLlib

https://issues.apache.org/jira/browse/SPARK-6091

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #6011 from yanboliang/spark-6091 and squashes the following commits:

bb3e4ba [Yanbo Liang] trigger jenkins
53c045d [Yanbo Liang] keep compatibility for python 2.6
972d5ac [Yanbo Liang] Add MulticlassMetrics in PySpark/MLlib
---
 .../mllib/evaluation/MulticlassMetrics.scala  |   8 ++
 python/pyspark/mllib/evaluation.py            | 129 ++++++++++++++++++
 2 files changed, 137 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 666362ae6739a..4628dc5690913 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg.{Matrices, Matrix}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
 
 /**
  * ::Experimental::
@@ -33,6 +34,13 @@ import org.apache.spark.rdd.RDD
 @Experimental
 class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
 
+  /**
+   * An auxiliary constructor taking a DataFrame.
+   * @param predictionAndLabels a DataFrame with two double columns: prediction and label
+   */
+  private[mllib] def this(predictionAndLabels: DataFrame) =
+    this(predictionAndLabels.map(r => (r.getDouble(0), r.getDouble(1))))
+
   private lazy val labelCountByClass: Map[Double, Long] = predictionAndLabels.values.countByValue()
   private lazy val labelCount: Long = labelCountByClass.values.sum
   private lazy val tpByClass: Map[Double, Int] = predictionAndLabels
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 3e11df09da6b1..36914597de228 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -141,6 +141,135 @@ def r2(self):
         return self.call("r2")
 
 
+class MulticlassMetrics(JavaModelWrapper):
+    """
+    Evaluator for multiclass classification.
+
+    >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
+    ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])
+    >>> metrics = MulticlassMetrics(predictionAndLabels)
+    >>> metrics.falsePositiveRate(0.0)
+    0.2...
+    >>> metrics.precision(1.0)
+    0.75...
+    >>> metrics.recall(2.0)
+    1.0...
+    >>> metrics.fMeasure(0.0, 2.0)
+    0.52...
+    >>> metrics.precision()
+    0.66...
+    >>> metrics.recall()
+    0.66...
+    >>> metrics.weightedFalsePositiveRate
+    0.19...
+    >>> metrics.weightedPrecision
+    0.68...
+    >>> metrics.weightedRecall
+    0.66...
+    >>> metrics.weightedFMeasure()
+    0.66...
+    >>> metrics.weightedFMeasure(2.0)
+    0.65...
+    """
+
+    def __init__(self, predictionAndLabels):
+        """
+        :param predictionAndLabels an RDD of (prediction, label) pairs.
+        """
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels, schema=StructType([
+            StructField("prediction", DoubleType(), nullable=False),
+            StructField("label", DoubleType(), nullable=False)]))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics
+        java_model = java_class(df._jdf)
+        super(MulticlassMetrics, self).__init__(java_model)
+
+    def truePositiveRate(self, label):
+        """
+        Returns true positive rate for a given label (category).
+        """
+        return self.call("truePositiveRate", label)
+
+    def falsePositiveRate(self, label):
+        """
+        Returns false positive rate for a given label (category).
+        """
+        return self.call("falsePositiveRate", label)
+
+    def precision(self, label=None):
+        """
+        Returns precision or precision for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("precision")
+        else:
+            return self.call("precision", float(label))
+
+    def recall(self, label=None):
+        """
+        Returns recall or recall for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("recall")
+        else:
+            return self.call("recall", float(label))
+
+    def fMeasure(self, label=None, beta=None):
+        """
+        Returns f-measure or f-measure for a given label (category) if specified.
+        """
+        if beta is None:
+            if label is None:
+                return self.call("fMeasure")
+            else:
+                return self.call("fMeasure", label)
+        else:
+            if label is None:
+                raise Exception("If the beta parameter is specified, label can not be none")
+            else:
+                return self.call("fMeasure", label, beta)
+
+    @property
+    def weightedTruePositiveRate(self):
+        """
+        Returns weighted true positive rate.
+        (equals to precision, recall and f-measure)
+        """
+        return self.call("weightedTruePositiveRate")
+
+    @property
+    def weightedFalsePositiveRate(self):
+        """
+        Returns weighted false positive rate.
+        """
+        return self.call("weightedFalsePositiveRate")
+
+    @property
+    def weightedRecall(self):
+        """
+        Returns weighted averaged recall.
+        (equals to precision, recall and f-measure)
+        """
+        return self.call("weightedRecall")
+
+    @property
+    def weightedPrecision(self):
+        """
+        Returns weighted averaged precision.
+        """
+        return self.call("weightedPrecision")
+
+    def weightedFMeasure(self, beta=None):
+        """
+        Returns weighted averaged f-measure.
+        """
+        if beta is None:
+            return self.call("weightedFMeasure")
+        else:
+            return self.call("weightedFMeasure", beta)
+
+
 def _test():
     import doctest
     from pyspark import SparkContext

From d7a37bcaf123389fb0828eefb92659c6d9cb3460 Mon Sep 17 00:00:00 2001
From: Oleg Sidorkin <oleg.sidorkin@gmail.com>
Date: Sun, 10 May 2015 01:31:34 -0700
Subject: [PATCH 070/320] [SPARK-7345][SQL] Spark cannot detect renamed columns
 using JDBC connector

Issue appears when one tries to create DataFrame using sqlContext.load("jdbc"...) statement when "dbtable" contains query with renamed columns.
If original column is used in SQL query once the resulting DataFrame will contain non-renamed column.
If original column is used in SQL query several times with different aliases, sqlContext.load will fail.
Original implementation of JDBCRDD.resolveTable uses getColumnName to detect column names in RDD schema.
Suggested implementation uses getColumnLabel to handle column renames in SQL statement which is aware of SQL "AS" statement.

Readings:
http://stackoverflow.com/questions/4271152/getcolumnlabel-vs-getcolumnname
http://stackoverflow.com/questions/12259829/jdbc-getcolumnname-getcolumnlabel-db2

Official documentation unfortunately a bit misleading in definition of "suggested title" purpose however clearly defines behavior of AS keyword in SQL statement.
http://docs.oracle.com/javase/7/docs/api/java/sql/ResultSetMetaData.html
getColumnLabel - Gets the designated column's suggested title for use in printouts and displays. The suggested title is usually specified by the SQL AS clause. If a SQL AS is not specified, the value returned from getColumnLabel will be the same as the value returned by the getColumnName method.

Author: Oleg Sidorkin <oleg.sidorkin@gmail.com>

Closes #6032 from osidorkin/master and squashes the following commits:

10fc44b [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite (resolved scala style test error)
2aaf6f7 [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite (renamed fields in JDBC query)
b7d5b22 [Oleg Sidorkin] [SPARK-7345][SQL] Regression test for JDBCSuite
09559a0 [Oleg Sidorkin] [SPARK-7345][SQL] Spark cannot detect renamed columns using JDBC connector
---
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala      |  2 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala    | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 1a5083dbe0f61..a03ade3881f59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -109,7 +109,7 @@ private[sql] object JDBCRDD extends Logging {
         val fields = new Array[StructField](ncols)
         var i = 0
         while (i < ncols) {
-          val columnName = rsmd.getColumnName(i + 1)
+          val columnName = rsmd.getColumnLabel(i + 1)
           val dataType = rsmd.getColumnType(i + 1)
           val typeName = rsmd.getColumnTypeName(i + 1)
           val fieldSize = rsmd.getPrecision(i + 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 021affafe36a6..2abfe7f167f77 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -204,6 +204,22 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     assert(ids(2) === 3)
   }
 
+  test("Register JDBC query with renamed fields") {
+    // Regression test for bug SPARK-7345
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE renamed
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable '(select NAME as NAME1, NAME as NAME2 from TEST.PEOPLE)',
+        |user 'testUser', password 'testPass')
+      """.stripMargin.replaceAll("\n", " "))
+
+    val df = sql("SELECT * FROM renamed")
+    assert(df.schema.fields.size == 2)
+    assert(df.schema.fields(0).name == "NAME1")
+    assert(df.schema.fields(1).name == "NAME2")
+  }
+
   test("Basic API") {
     assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE").collect().size === 3)
   }

From 6bf9352fa5d740d01ffdafbbb23d9732752a8d87 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Sun, 10 May 2015 21:26:36 +0800
Subject: [PATCH 071/320] [MINOR] [SQL] Fixes variable name typo

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/6038)
<!-- Reviewable:end -->

Author: Cheng Lian <lian@databricks.com>

Closes #6038 from liancheng/fix-typo and squashes the following commits:

572c2a4 [Cheng Lian] Fixes variable name typo
---
 .../apache/spark/sql/sources/CreateTableAsSelectSuite.scala   | 2 +-
 .../scala/org/apache/spark/sql/sources/DDLTestSuite.scala     | 2 +-
 .../scala/org/apache/spark/sql/sources/DataSourceTest.scala   | 4 ++--
 .../org/apache/spark/sql/sources/FilteredScanSuite.scala      | 2 +-
 .../test/scala/org/apache/spark/sql/sources/InsertSuite.scala | 2 +-
 .../scala/org/apache/spark/sql/sources/PrunedScanSuite.scala  | 2 +-
 .../scala/org/apache/spark/sql/sources/SaveLoadSuite.scala    | 2 +-
 .../scala/org/apache/spark/sql/sources/TableScanSuite.scala   | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 20a23b3bd6aa9..54f2f3cdec298 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.Utils
 
 class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
 
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   var path: File = null
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index ca25751b9583d..6664e8d64c13a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -64,7 +64,7 @@ case class SimpleDDLScan(from: Int, to: Int, table: String)(@transient val sqlCo
 }
 
 class DDLTestSuite extends DataSourceTest {
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   before {
       sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
index 9d3090c19b4e8..24ed665c67d2e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfter
 
 abstract class DataSourceTest extends QueryTest with BeforeAndAfter {
   // We want to test some edge cases.
-  implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext)
+  implicit val caseInsensitiveContext = new SQLContext(TestSQLContext.sparkContext)
 
-  caseInsensisitiveContext.setConf(SQLConf.CASE_SENSITIVE, "false")
+  caseInsensitiveContext.setConf(SQLConf.CASE_SENSITIVE, "false")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
index cb5e5147ff189..cce747e7dbf64 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -97,7 +97,7 @@ object FiltersPushed {
 
 class FilteredScanSuite extends DataSourceTest {
 
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   before {
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 50629ea4dc066..d1d427e1790bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.Utils
 
 class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
 
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   var path: File = null
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
index 6a1ddf2f8e98b..c2bc52e2120c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
@@ -52,7 +52,7 @@ case class SimplePrunedScan(from: Int, to: Int)(@transient val sqlContext: SQLCo
 }
 
 class PrunedScanSuite extends DataSourceTest {
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   before {
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index cb287ba85c1f8..6567d1acd7644 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
 
 class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
 
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   var originalDefaultSource: String = null
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 3b47b8adf313b..77af04a491742 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -88,7 +88,7 @@ case class AllDataTypesScan(
 }
 
 class TableScanSuite extends DataSourceTest {
-  import caseInsensisitiveContext._
+  import caseInsensitiveContext._
 
   var tableWithSchemaExpected = (1 to 10).map { i =>
     Row(

From 3038443e58b9320c56f7785d9e36d4f85a563e6b Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Sun, 10 May 2015 13:29:27 -0700
Subject: [PATCH 072/320] [SPARK-7431] [ML] [PYTHON] Made CrossValidatorModel
 call parent init in PySpark

Fixes bug with PySpark cvModel not having UID
Also made small PySpark fixes: Evaluator should inherit from Params.  MockModel should inherit from Model.

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5968 from jkbradley/pyspark-cv-uid and squashes the following commits:

57f13cd [Joseph K. Bradley] Made CrossValidatorModel call parent init in PySpark
---
 python/pyspark/ml/pipeline.py | 2 +-
 python/pyspark/ml/tests.py    | 4 ++--
 python/pyspark/ml/tuning.py   | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index c1b2077c985cf..fdbae06405f6a 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -179,7 +179,7 @@ def transform(self, dataset, params={}):
         return dataset
 
 
-class Evaluator(object):
+class Evaluator(Params):
     """
     Base class for evaluators that compute metrics from predictions.
     """
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 3a42bcf723894..75bb5d749ca87 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -34,7 +34,7 @@
 from pyspark.sql import DataFrame
 from pyspark.ml.param import Param
 from pyspark.ml.param.shared import HasMaxIter, HasInputCol
-from pyspark.ml.pipeline import Transformer, Estimator, Pipeline
+from pyspark.ml.pipeline import Estimator, Model, Pipeline, Transformer
 
 
 class MockDataset(DataFrame):
@@ -77,7 +77,7 @@ def fit(self, dataset, params={}):
         return model
 
 
-class MockModel(MockTransformer, Transformer):
+class MockModel(MockTransformer, Model):
 
     def __init__(self):
         super(MockModel, self).__init__()
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 28e3727f2c064..86f4dc7368be0 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -236,6 +236,7 @@ class CrossValidatorModel(Model):
     """
 
     def __init__(self, bestModel):
+        super(CrossValidatorModel, self).__init__()
         #: best model from cross validation
         self.bestModel = bestModel
 

From 8c07c75c9831d6c34f69fe840edb6470d4dfdfef Mon Sep 17 00:00:00 2001
From: "Kirill A. Korinskiy" <catap@catap.ru>
Date: Sun, 10 May 2015 13:34:00 -0700
Subject: [PATCH 073/320] [SPARK-5521] PCA wrapper for easy transform vectors

I implement a simple PCA wrapper for easy transform of vectors by PCA for example LabeledPoint or another complicated structure.

Example of usage:
```
  import org.apache.spark.mllib.regression.LinearRegressionWithSGD
  import org.apache.spark.mllib.regression.LabeledPoint
  import org.apache.spark.mllib.linalg.Vectors
  import org.apache.spark.mllib.feature.PCA

  val data = sc.textFile("data/mllib/ridge-data/lpsa.data").map { line =>
    val parts = line.split(',')
    LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
  }.cache()

  val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
  val training = splits(0).cache()
  val test = splits(1)

  val pca = PCA.create(training.first().features.size/2, data.map(_.features))
  val training_pca = training.map(p => p.copy(features = pca.transform(p.features)))
  val test_pca = test.map(p => p.copy(features = pca.transform(p.features)))

  val numIterations = 100
  val model = LinearRegressionWithSGD.train(training, numIterations)
  val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations)

  val valuesAndPreds = test.map { point =>
    val score = model.predict(point.features)
    (score, point.label)
  }

  val valuesAndPreds_pca = test_pca.map { point =>
    val score = model_pca.predict(point.features)
    (score, point.label)
  }

  val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean()
  val MSE_pca = valuesAndPreds_pca.map{case(v, p) => math.pow((v - p), 2)}.mean()

  println("Mean Squared Error = " + MSE)
  println("PCA Mean Squared Error = " + MSE_pca)
```

Author: Kirill A. Korinskiy <catap@catap.ru>
Author: Joseph K. Bradley <joseph@databricks.com>

Closes #4304 from catap/pca and squashes the following commits:

501bcd9 [Joseph K. Bradley] Small updates: removed k from Java-friendly PCA fit().  In PCASuite, converted results to set for comparison. Added an error message for bad k in PCA.
9dcc02b [Kirill A. Korinskiy] [SPARK-5521] fix scala style
1892a06 [Kirill A. Korinskiy] [SPARK-5521] PCA wrapper for easy transform vectors
---
 docs/mllib-dimensionality-reduction.md        | 19 +++-
 docs/mllib-feature-extraction.md              | 55 ++++++++++-
 .../org/apache/spark/mllib/feature/PCA.scala  | 93 +++++++++++++++++++
 .../apache/spark/mllib/feature/PCASuite.scala | 48 ++++++++++
 4 files changed, 213 insertions(+), 2 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala

diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index 870fed6cc5024..05f51168d837c 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -137,7 +137,7 @@ statistical method to find a rotation such that the first coordinate has the lar
 possible, and each succeeding coordinate in turn has the largest variance possible. The columns of
 the rotation matrix are called principal components. PCA is used widely in dimensionality reduction.
 
-MLlib supports PCA for tall-and-skinny matrices stored in row-oriented format.
+MLlib supports PCA for tall-and-skinny matrices stored in row-oriented format and any Vectors.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -157,6 +157,23 @@ val pc: Matrix = mat.computePrincipalComponents(10) // Principal components are
 // Project the rows to the linear space spanned by the top 10 principal components.
 val projected: RowMatrix = mat.multiply(pc)
 {% endhighlight %}
+
+The following code demonstrates how to compute principal components on source vectors
+and use them to project the vectors into a low-dimensional space while keeping associated labels:
+
+{% highlight scala %}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.feature.PCA
+
+val data: RDD[LabeledPoint] = ...
+
+// Compute the top 10 principal components.
+val pca = new PCA(10).fit(data.map(_.features))
+
+// Project vectors to the linear space spanned by the top 10 principal components, keeping the label
+val projected = data.map(p => p.copy(features = pca.transform(p.features)))
+{% endhighlight %}
+
 </div>
 
 <div data-lang="java" markdown="1">
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index 03fedd01016b9..f723cd6b9dfab 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -507,7 +507,6 @@ v_N
 
 This example below demonstrates how to load a simple vectors file, extract a set of vectors, then transform those vectors using a transforming vector value.
 
-
 <div class="codetabs">
 <div data-lang="scala">
 {% highlight scala %}
@@ -531,3 +530,57 @@ val transformedData2 = parsedData.map(x => transformer.transform(x))
 </div>
 
 
+## PCA
+
+A feature transformer that projects vectors to a low-dimensional space using PCA.
+Details you can read at [dimensionality reduction](mllib-dimensionality-reduction.html).
+
+### Example
+
+The following code demonstrates how to compute principal components on a `Vector`
+and use them to project the vectors into a low-dimensional space while keeping associated labels
+for calculation a [Linear Regression]((mllib-linear-methods.html))
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.feature.PCA
+
+val data = sc.textFile("data/mllib/ridge-data/lpsa.data").map { line =>
+  val parts = line.split(',')
+  LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
+}.cache()
+
+val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+val training = splits(0).cache()
+val test = splits(1)
+
+val pca = new PCA(training.first().features.size/2).fit(data.map(_.features))
+val training_pca = training.map(p => p.copy(features = pca.transform(p.features)))
+val test_pca = test.map(p => p.copy(features = pca.transform(p.features)))
+
+val numIterations = 100
+val model = LinearRegressionWithSGD.train(training, numIterations)
+val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations)
+
+val valuesAndPreds = test.map { point =>
+  val score = model.predict(point.features)
+  (score, point.label)
+}
+
+val valuesAndPreds_pca = test_pca.map { point =>
+  val score = model_pca.predict(point.features)
+  (score, point.label)
+}
+
+val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean()
+val MSE_pca = valuesAndPreds_pca.map{case(v, p) => math.pow((v - p), 2)}.mean()
+
+println("Mean Squared Error = " + MSE)
+println("PCA Mean Squared Error = " + MSE_pca)
+{% endhighlight %}
+</div>
+</div>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
new file mode 100644
index 0000000000000..4e01e402b4283
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+import org.apache.spark.rdd.RDD
+
+/**
+ * A feature transformer that projects vectors to a low-dimensional space using PCA.
+ *
+ * @param k number of principal components
+ */
+class PCA(val k: Int) {
+  require(k >= 1, s"PCA requires a number of principal components k >= 1 but was given $k")
+
+  /**
+   * Computes a [[PCAModel]] that contains the principal components of the input vectors.
+   *
+   * @param sources source vectors
+   */
+  def fit(sources: RDD[Vector]): PCAModel = {
+    require(k <= sources.first().size,
+      s"source vector size is ${sources.first().size} must be greater than k=$k")
+
+    val mat = new RowMatrix(sources)
+    val pc = mat.computePrincipalComponents(k) match {
+      case dm: DenseMatrix =>
+        dm
+      case sm: SparseMatrix =>
+        /* Convert a sparse matrix to dense.
+         *
+         * RowMatrix.computePrincipalComponents always returns a dense matrix.
+         * The following code is a safeguard.
+         */
+        sm.toDense
+      case m =>
+        throw new IllegalArgumentException("Unsupported matrix format. Expected " +
+          s"SparseMatrix or DenseMatrix. Instead got: ${m.getClass}")
+
+    }
+    new PCAModel(k, pc)
+  }
+
+  /** Java-friendly version of [[fit()]] */
+  def fit(sources: JavaRDD[Vector]): PCAModel = fit(sources.rdd)
+}
+
+/**
+ * Model fitted by [[PCA]] that can project vectors to a low-dimensional space using PCA.
+ *
+ * @param k number of principal components.
+ * @param pc a principal components Matrix. Each column is one principal component.
+ */
+class PCAModel private[mllib] (val k: Int, val pc: DenseMatrix) extends VectorTransformer {
+  /**
+   * Transform a vector by computed Principal Components.
+   *
+   * @param vector vector to be transformed.
+   *               Vector must be the same length as the source vectors given to [[PCA.fit()]].
+   * @return transformed vector. Vector will be of length k.
+   */
+  override def transform(vector: Vector): Vector = {
+    vector match {
+      case dv: DenseVector =>
+        pc.transpose.multiply(dv)
+      case SparseVector(size, indices, values) =>
+        /* SparseVector -> single row SparseMatrix */
+        val sm = Matrices.sparse(size, 1, Array(0, indices.length), indices, values).transpose
+        val projection = sm.multiply(pc)
+        Vectors.dense(projection.values)
+      case _ =>
+        throw new IllegalArgumentException("Unsupported vector format. Expected " +
+          s"SparseVector or DenseVector. Instead got: ${vector.getClass}")
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
new file mode 100644
index 0000000000000..758af588f1c69
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class PCASuite extends FunSuite with MLlibTestSparkContext {
+
+  private val data = Array(
+    Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+    Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+    Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+  )
+
+  private lazy val dataRDD = sc.parallelize(data, 2)
+
+  test("Correct computing use a PCA wrapper") {
+    val k = dataRDD.count().toInt
+    val pca = new PCA(k).fit(dataRDD)
+
+    val mat = new RowMatrix(dataRDD)
+    val pc = mat.computePrincipalComponents(k)
+
+    val pca_transform = pca.transform(dataRDD).collect()
+    val mat_multiply = mat.multiply(pc).rows.collect()
+
+    assert(pca_transform.toSet === mat_multiply.toSet)
+  }
+}

From c5aca0c27be31e94ffdb01ef2eb29d3b373d7f4c Mon Sep 17 00:00:00 2001
From: Glenn Weidner <gweidner@us.ibm.com>
Date: Sun, 10 May 2015 19:18:32 -0700
Subject: [PATCH 074/320] [SPARK-7427] [PYSPARK] Make sharedParams match in
 Scala, Python

Modified 2 files:
python/pyspark/ml/param/_shared_params_code_gen.py
python/pyspark/ml/param/shared.py

Generated shared.py on Linux using Python 2.6.6 on Redhat Enterprise Linux Server 6.6.
python _shared_params_code_gen.py > shared.py

Only changed maxIter, regParam, rawPredictionCol based on strings from SharedParamsCodeGen.scala.  Note warning was displayed when committing shared.py:
warning: LF will be replaced by CRLF in python/pyspark/ml/param/shared.py.

Author: Glenn Weidner <gweidner@us.ibm.com>

Closes #6023 from gweidner/br-7427 and squashes the following commits:

db72e32 [Glenn Weidner] [SPARK-7427] [PySpark] Make sharedParams match in Scala, Python
825e4a9 [Glenn Weidner] [SPARK-7427] [PySpark] Make sharedParams match in Scala, Python
e6a865e [Glenn Weidner] [SPARK-7427] [PySpark] Make sharedParams match in Scala, Python
1eee702 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
1ac10e5 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
cafd104 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
9bea1eb [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
4a35c20 [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
9790cbe [Glenn Weidner] Merge remote-tracking branch 'upstream/master'
d9c30f4 [Glenn Weidner] [SPARK-7275] [SQL] [WIP] Make LogicalRelation public
---
 .../ml/param/_shared_params_code_gen.py       |  6 ++--
 python/pyspark/ml/param/shared.py             | 30 +++++++++----------
 python/pyspark/ml/tests.py                    |  4 +--
 tox.ini                                       |  2 +-
 4 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index ed3171b6976d3..3be0979b92013 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -88,12 +88,12 @@ def get$Name(self):
     print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n")
     print("from pyspark.ml.param import Param, Params\n\n")
     shared = [
-        ("maxIter", "max number of iterations", None),
-        ("regParam", "regularization constant", None),
+        ("maxIter", "max number of iterations (>= 0)", None),
+        ("regParam", "regularization parameter (>= 0)", None),
         ("featuresCol", "features column name", "'features'"),
         ("labelCol", "label column name", "'label'"),
         ("predictionCol", "prediction column name", "'prediction'"),
-        ("rawPredictionCol", "raw prediction column name", "'rawPrediction'"),
+        ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
         ("outputCol", "output column name", None),
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index d0bcadee22347..4b22322b895b4 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -22,16 +22,16 @@
 
 class HasMaxIter(Params):
     """
-    Mixin for param maxIter: max number of iterations.
+    Mixin for param maxIter: max number of iterations (>= 0).
     """
 
     # a placeholder to make it appear in the generated doc
-    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations")
+    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0)")
 
     def __init__(self):
         super(HasMaxIter, self).__init__()
-        #: param for max number of iterations
-        self.maxIter = Param(self, "maxIter", "max number of iterations")
+        #: param for max number of iterations (>= 0)
+        self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)")
         if None is not None:
             self._setDefault(maxIter=None)
 
@@ -51,16 +51,16 @@ def getMaxIter(self):
 
 class HasRegParam(Params):
     """
-    Mixin for param regParam: regularization constant.
+    Mixin for param regParam: regularization parameter (>= 0).
     """
 
     # a placeholder to make it appear in the generated doc
-    regParam = Param(Params._dummy(), "regParam", "regularization constant")
+    regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0)")
 
     def __init__(self):
         super(HasRegParam, self).__init__()
-        #: param for regularization constant
-        self.regParam = Param(self, "regParam", "regularization constant")
+        #: param for regularization parameter (>= 0)
+        self.regParam = Param(self, "regParam", "regularization parameter (>= 0)")
         if None is not None:
             self._setDefault(regParam=None)
 
@@ -167,16 +167,16 @@ def getPredictionCol(self):
 
 class HasRawPredictionCol(Params):
     """
-    Mixin for param rawPredictionCol: raw prediction column name.
+    Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.
     """
 
     # a placeholder to make it appear in the generated doc
-    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction column name")
+    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
 
     def __init__(self):
         super(HasRawPredictionCol, self).__init__()
-        #: param for raw prediction column name
-        self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction column name")
+        #: param for raw prediction (a.k.a. confidence) column name
+        self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
         if 'rawPrediction' is not None:
             self._setDefault(rawPredictionCol='rawPrediction')
 
@@ -403,14 +403,12 @@ class HasStepSize(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    stepSize = Param(Params._dummy(), "stepSize",
-                     "Step size to be used for each iteration of optimization.")
+    stepSize = Param(Params._dummy(), "stepSize", "Step size to be used for each iteration of optimization.")
 
     def __init__(self):
         super(HasStepSize, self).__init__()
         #: param for Step size to be used for each iteration of optimization.
-        self.stepSize = Param(self, "stepSize",
-                              "Step size to be used for each iteration of optimization.")
+        self.stepSize = Param(self, "stepSize", "Step size to be used for each iteration of optimization.")
         if None is not None:
             self._setDefault(stepSize=None)
 
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 75bb5d749ca87..ba6478dcd58a9 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -128,7 +128,7 @@ def test_param(self):
         testParams = TestParams()
         maxIter = testParams.maxIter
         self.assertEqual(maxIter.name, "maxIter")
-        self.assertEqual(maxIter.doc, "max number of iterations")
+        self.assertEqual(maxIter.doc, "max number of iterations (>= 0)")
         self.assertTrue(maxIter.parent is testParams)
 
     def test_params(self):
@@ -156,7 +156,7 @@ def test_params(self):
         self.assertEquals(
             testParams.explainParams(),
             "\n".join(["inputCol: input column name (undefined)",
-                       "maxIter: max number of iterations (default: 10, current: 100)"]))
+                       "maxIter: max number of iterations (>= 0) (default: 10, current: 100)"]))
 
 
 if __name__ == "__main__":
diff --git a/tox.ini b/tox.ini
index b568029a204cc..76e3f42cde62d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,4 +15,4 @@
 
 [pep8]
 max-line-length=100
-exclude=cloudpickle.py,heapq3.py
+exclude=cloudpickle.py,heapq3.py,shared.py

From 0835f1edd4c9c05439df85c248faf6787d45f7b7 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Sun, 10 May 2015 19:49:42 -0700
Subject: [PATCH 075/320] [SPARK-7512] [SPARKR] Fix RDD's show method to use
 getJRDD

Since the RDD object might be a Pipelined RDD we should use `getJRDD` to get the right handle to the Java object.

Fixes the bug reported at
http://stackoverflow.com/questions/30057702/sparkr-filterrdd-and-flatmap-not-working

cc concretevitamin

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #6035 from shivaram/sparkr-show-bug and squashes the following commits:

d70145c [Shivaram Venkataraman] Fix RDD's show method to use getJRDD Fixes the bug reported at http://stackoverflow.com/questions/30057702/sparkr-filterrdd-and-flatmap-not-working
---
 R/pkg/R/RDD.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 73999a6737032..9138629cac9c0 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -67,8 +67,8 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
 })
 
 setMethod("show", "RDD",
-          function(.Object) {
-              cat(paste(callJMethod(.Object@jrdd, "toString"), "\n", sep=""))
+          function(object) {
+              cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep=""))
           })
 
 setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) {

From 2242ab31e99227a102b0918d73db67e99899fd24 Mon Sep 17 00:00:00 2001
From: tianyi <tianyi.asiainfo@gmail.com>
Date: Mon, 11 May 2015 14:08:15 +0800
Subject: [PATCH 076/320] [SPARK-7519] [SQL] fix minor bugs in thrift server UI

Bugs description:

1. There are extra commas on the top of session list.
2. The format of time in "Start at:" part is not the same as others.
3. The total number of online sessions is wrong.

Author: tianyi <tianyi.asiainfo@gmail.com>

Closes #6048 from tianyi/SPARK-7519 and squashes the following commits:

ed366b7 [tianyi] fix bug
---
 .../sql/hive/thriftserver/HiveThriftServer2.scala      |  4 +++-
 .../sql/hive/thriftserver/ui/ThriftServerPage.scala    | 10 +++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 0be5a92c2546c..3458b04bfba0f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -147,7 +147,7 @@ object HiveThriftServer2 extends Logging {
     override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
       server.stop()
     }
-
+    var onlineSessionNum: Int = 0
     val sessionList = new mutable.LinkedHashMap[String, SessionInfo]
     val executionList = new mutable.LinkedHashMap[String, ExecutionInfo]
     val retainedStatements =
@@ -170,11 +170,13 @@ object HiveThriftServer2 extends Logging {
     def onSessionCreated(ip: String, sessionId: String, userName: String = "UNKNOWN"): Unit = {
       val info = new SessionInfo(sessionId, System.currentTimeMillis, ip, userName)
       sessionList.put(sessionId, info)
+      onlineSessionNum += 1
       trimSessionIfNecessary()
     }
 
     def onSessionClosed(sessionId: String): Unit = {
       sessionList(sessionId).finishTimestamp = System.currentTimeMillis
+      onlineSessionNum -= 1
     }
 
     def onStatementStart(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 71b16b6bebffb..6a2be4a58e5cb 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -29,7 +29,7 @@ import org.apache.spark.ui.UIUtils._
 import org.apache.spark.ui._
 
 
-/** Page for Spark Web UI that shows statistics of a streaming job */
+/** Page for Spark Web UI that shows statistics of a thrift server */
 private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("") with Logging {
 
   private val listener = parent.listener
@@ -42,7 +42,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
       generateBasicStats() ++
       <br/> ++
       <h4>
-        {listener.sessionList.size} session(s) are online,
+        {listener.onlineSessionNum} session(s) are online,
         running {listener.totalRunning} SQL statement(s)
       </h4> ++
       generateSessionStatsTable() ++
@@ -50,12 +50,12 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
     UIUtils.headerSparkPage("ThriftServer", content, parent, Some(5000))
   }
 
-  /** Generate basic stats of the streaming program */
+  /** Generate basic stats of the thrift server program */
   private def generateBasicStats(): Seq[Node] = {
     val timeSinceStart = System.currentTimeMillis() - startTime.getTime
     <ul class ="unstyled">
       <li>
-        <strong>Started at: </strong> {startTime.toString}
+        <strong>Started at: </strong> {formatDate(startTime)}
       </li>
       <li>
         <strong>Time since start: </strong>{formatDurationVerbose(timeSinceStart)}
@@ -148,7 +148,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
         <tr>
           <td> {session.userName} </td>
           <td> {session.ip} </td>
-          <td> <a href={sessionLink}> {session.sessionId} </a> </td>,
+          <td> <a href={sessionLink}> {session.sessionId} </a> </td>
           <td> {formatDate(session.startTimestamp)} </td>
           <td> {if(session.finishTimestamp > 0) formatDate(session.finishTimestamp)} </td>
           <td> {formatDurationOption(Some(session.totalTime))} </td>

From d70a076892e0677acceccaba665908cdf664f1b4 Mon Sep 17 00:00:00 2001
From: Wesley Miao <wesley.miao@gmail.com>
Date: Mon, 11 May 2015 12:20:06 +0100
Subject: [PATCH 077/320] [SPARK-7326] [STREAMING] Performing window() on a
 WindowedDStream doesn't work all the time

tdas

https://issues.apache.org/jira/browse/SPARK-7326

The problem most likely resides in DStream.slice() implementation, as shown below.

  def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = {
    if (!isInitialized) {
      throw new SparkException(this + " has not been initialized")
    }
    if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) {
      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
        + slideDuration + ")")
    }
    if (!(toTime - zeroTime).isMultipleOf(slideDuration)) {
      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration ("
        + slideDuration + ")")
    }
    val alignedToTime = toTime.floor(slideDuration, zeroTime)
    val alignedFromTime = fromTime.floor(slideDuration, zeroTime)

    logInfo("Slicing from " + fromTime + " to " + toTime +
      " (aligned to " + alignedFromTime + " and " + alignedToTime + ")")

    alignedFromTime.to(alignedToTime, slideDuration).flatMap(time => {
      if (time >= zeroTime) getOrCompute(time) else None
    })
  }

Here after performing floor() on both fromTime and toTime, the result (alignedFromTime - zeroTime) and (alignedToTime - zeroTime) may no longer be multiple of the slidingDuration, thus making isTimeValid() check failed for all the remaining computation.

The fix is to add a new floor() function in Time.scala to respect the zeroTime while performing the floor :

  def floor(that: Duration, zeroTime: Time): Time = {
    val t = that.milliseconds
    new Time(((this.millis - zeroTime.milliseconds) / t) * t + zeroTime.milliseconds)
  }

And then change the DStream.slice to call this new floor function by passing in its zeroTime.

    val alignedToTime = toTime.floor(slideDuration, zeroTime)
    val alignedFromTime = fromTime.floor(slideDuration, zeroTime)

This way the alignedToTime and alignedFromTime are *really* aligned in respect to zeroTime whose value is not really a 0.

Author: Wesley Miao <wesley.miao@gmail.com>
Author: Wesley <wesley.miao@autodesk.com>

Closes #5871 from wesleymiao/spark-7326 and squashes the following commits:

82a4d8c [Wesley Miao] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream dosen't work all the time
48b4dc0 [Wesley] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time
6ade399 [Wesley] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time
2611745 [Wesley Miao] [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time
---
 .../org/apache/spark/streaming/Time.scala     |  5 +++++
 .../spark/streaming/dstream/DStream.scala     | 22 ++++++++++++-------
 .../apache/spark/streaming/TimeSuite.scala    |  3 +++
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
index 42c49678d24f0..92cfd7d40338c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala
@@ -63,6 +63,11 @@ case class Time(private val millis: Long) {
     new Time((this.millis / t) * t)
   }
 
+  def floor(that: Duration, zeroTime: Time): Time = {
+    val t = that.milliseconds
+    new Time(((this.millis - zeroTime.milliseconds) / t) * t + zeroTime.milliseconds)
+  }
+
   def isMultipleOf(that: Duration): Boolean =
     (this.millis % that.milliseconds == 0)
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index f1f8a70655996..7092a3d3f0b86 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -763,16 +763,22 @@ abstract class DStream[T: ClassTag] (
     if (!isInitialized) {
       throw new SparkException(this + " has not been initialized")
     }
-    if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) {
-      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
-        + slideDuration + ")")
+
+    val alignedToTime = if ((toTime - zeroTime).isMultipleOf(slideDuration)) {
+      toTime
+    } else {
+      logWarning("toTime (" + toTime + ") is not a multiple of slideDuration ("
+          + slideDuration + ")")
+        toTime.floor(slideDuration, zeroTime)
     }
-    if (!(toTime - zeroTime).isMultipleOf(slideDuration)) {
-      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration ("
-        + slideDuration + ")")
+
+    val alignedFromTime = if ((fromTime - zeroTime).isMultipleOf(slideDuration)) {
+      fromTime
+    } else {
+      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
+      + slideDuration + ")")
+      fromTime.floor(slideDuration, zeroTime)
     }
-    val alignedToTime = toTime.floor(slideDuration)
-    val alignedFromTime = fromTime.floor(slideDuration)
 
     logInfo("Slicing from " + fromTime + " to " + toTime +
       " (aligned to " + alignedFromTime + " and " + alignedToTime + ")")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
index 5579ac364346c..e6a01656f479d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala
@@ -69,6 +69,9 @@ class TimeSuite extends TestSuiteBase {
     assert(new Time(1200).floor(new Duration(200)) == new Time(1200))
     assert(new Time(199).floor(new Duration(200)) == new Time(0))
     assert(new Time(1).floor(new Duration(1)) == new Time(1))
+    assert(new Time(1350).floor(new Duration(200), new Time(50)) == new Time(1250))
+    assert(new Time(1350).floor(new Duration(200), new Time(150)) == new Time(1350))
+    assert(new Time(1350).floor(new Duration(200), new Time(200)) == new Time(1200))
   }
 
   test("isMultipleOf") {

From 042dda3c5c25b5ecb6ae4fd37c85b211b01c187b Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 11 May 2015 09:14:20 -0700
Subject: [PATCH 078/320] [SPARK-6092] [MLLIB] Add RankingMetrics in
 PySpark/MLlib

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #6044 from yanboliang/spark-6092 and squashes the following commits:

726a9b1 [Yanbo Liang] add newRankingMetrics
33f649c [Yanbo Liang] Add RankingMetrics in PySpark/MLlib
---
 .../mllib/api/python/PythonMLLibAPI.scala     | 10 +++
 python/pyspark/mllib/evaluation.py            | 78 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 8c30ad4b391ae..f4c477596557f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -32,6 +32,7 @@ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
+import org.apache.spark.mllib.evaluation.RankingMetrics
 import org.apache.spark.mllib.feature._
 import org.apache.spark.mllib.fpm.{FPGrowth, FPGrowthModel}
 import org.apache.spark.mllib.linalg._
@@ -50,6 +51,7 @@ import org.apache.spark.mllib.tree.model.{DecisionTreeModel, GradientBoostedTree
 import org.apache.spark.mllib.tree.{DecisionTree, GradientBoostedTrees, RandomForest}
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
@@ -923,6 +925,14 @@ private[python] class PythonMLLibAPI extends Serializable {
     RG.gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols, parts, s)
   }
 
+  /**
+   * Java stub for the constructor of Python mllib RankingMetrics
+   */
+  def newRankingMetrics(predictionAndLabels: DataFrame): RankingMetrics[Any] = {
+    new RankingMetrics(predictionAndLabels.map(
+      r => (r.getSeq(0).toArray[Any], r.getSeq(1).toArray[Any])))
+  }
+
 
 }
 
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 36914597de228..4c777f2180dc9 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -15,9 +15,12 @@
 # limitations under the License.
 #
 
-from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
 from pyspark.sql import SQLContext
-from pyspark.sql.types import StructField, StructType, DoubleType
+from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType
+
+__all__ = ['BinaryClassificationMetrics', 'RegressionMetrics',
+           'MulticlassMetrics', 'RankingMetrics']
 
 
 class BinaryClassificationMetrics(JavaModelWrapper):
@@ -270,6 +273,77 @@ def weightedFMeasure(self, beta=None):
             return self.call("weightedFMeasure", beta)
 
 
+class RankingMetrics(JavaModelWrapper):
+    """
+    Evaluator for ranking algorithms.
+
+    >>> predictionAndLabels = sc.parallelize([
+    ...     ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]),
+    ...     ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]),
+    ...     ([1, 2, 3, 4, 5], [])])
+    >>> metrics = RankingMetrics(predictionAndLabels)
+    >>> metrics.precisionAt(1)
+    0.33...
+    >>> metrics.precisionAt(5)
+    0.26...
+    >>> metrics.precisionAt(15)
+    0.17...
+    >>> metrics.meanAveragePrecision
+    0.35...
+    >>> metrics.ndcgAt(3)
+    0.33...
+    >>> metrics.ndcgAt(10)
+    0.48...
+
+    """
+
+    def __init__(self, predictionAndLabels):
+        """
+        :param predictionAndLabels: an RDD of (predicted ranking, ground truth set) pairs.
+        """
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels,
+                                     schema=sql_ctx._inferSchema(predictionAndLabels))
+        java_model = callMLlibFunc("newRankingMetrics", df._jdf)
+        super(RankingMetrics, self).__init__(java_model)
+
+    def precisionAt(self, k):
+        """
+        Compute the average precision of all the queries, truncated at ranking position k.
+
+        If for a query, the ranking algorithm returns n (n < k) results, the precision value
+        will be computed as #(relevant items retrieved) / k. This formula also applies when
+        the size of the ground truth set is less than k.
+
+        If a query has an empty ground truth set, zero will be used as precision together
+        with a log warning.
+        """
+        return self.call("precisionAt", int(k))
+
+    @property
+    def meanAveragePrecision(self):
+        """
+        Returns the mean average precision (MAP) of all the queries.
+        If a query has an empty ground truth set, the average precision will be zero and
+        a log warining is generated.
+        """
+        return self.call("meanAveragePrecision")
+
+    def ndcgAt(self, k):
+        """
+        Compute the average NDCG value of all the queries, truncated at ranking position k.
+        The discounted cumulative gain at position k is computed as:
+            sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
+        and the NDCG is obtained by dividing the DCG value on the ground truth set.
+        In the current implementation, the relevance value is binary.
+
+        If a query has an empty ground truth set, zero will be used as ndcg together with
+        a log warning.
+        """
+        return self.call("ndcgAt", int(k))
+
+
 def _test():
     import doctest
     from pyspark import SparkContext

From 4f8a15519267ac205424270155254382cc2d3690 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <bjcutler@us.ibm.com>
Date: Mon, 11 May 2015 09:23:47 -0700
Subject: [PATCH 079/320] [SPARK-7522] [EXAMPLES] Removed angle brackets from
 dataFormat option

As is, to specify this option on command line, you have to escape the angle brackets.

Author: Bryan Cutler <bjcutler@us.ibm.com>

Closes #6049 from BryanCutler/dataFormat-option-7522 and squashes the following commits:

b34afb4 [Bryan Cutler] [SPARK-7522] Removed angle brackets from dataFormat option
---
 .../org/apache/spark/examples/ml/DecisionTreeExample.scala      | 2 +-
 .../main/scala/org/apache/spark/examples/ml/GBTExample.scala    | 2 +-
 .../org/apache/spark/examples/ml/RandomForestExample.scala      | 2 +-
 .../org/apache/spark/examples/mllib/DecisionTreeRunner.scala    | 2 +-
 .../spark/examples/mllib/GradientBoostedTreesRunner.scala       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
index 8340d91101ab3..54e4073941056 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -112,7 +112,7 @@ object DecisionTreeExample {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
index c5899b6683c79..33905277c7341 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@@ -111,7 +111,7 @@ object GBTExample {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
         s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
index 7f88d2681bcaa..9f7cad68a4594 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@@ -117,7 +117,7 @@ object RandomForestExample {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
         s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index 262fd2c9611d0..b0613632c9946 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -126,7 +126,7 @@ object DecisionTreeRunner {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
index 0763a7736305a..7416fb5a40848 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -69,7 +69,7 @@ object GradientBoostedTreesRunner {
         .text(s"input path to test dataset.  If given, option fracTest is ignored." +
           s" default: ${defaultParams.testInput}")
         .action((x, c) => c.copy(testInput = x))
-      opt[String]("<dataFormat>")
+      opt[String]("dataFormat")
         .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
         .action((x, c) => c.copy(dataFormat = x))
       arg[String]("<input>")

From 1b46556999ca126cb593ef052d24afcb75383223 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 11 May 2015 10:58:56 -0700
Subject: [PATCH 080/320] [SPARK-7361] [STREAMING] Throw unambiguous exception
 when attempting to start multiple StreamingContexts in the same JVM

Currently attempt to start a streamingContext while another one is started throws a confusing exception that the action name JobScheduler is already registered. Instead its best to throw a proper exception as it is not supported.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #5907 from tdas/SPARK-7361 and squashes the following commits:

fb81c4a [Tathagata Das] Fix typo
a9cd5bb [Tathagata Das] Added startSite to StreamingContext
5fdfc0d [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7361
5870e2b [Tathagata Das] Added check for multiple streaming contexts
---
 .../spark/streaming/StreamingContext.scala    | 48 +++++++++++++++----
 .../streaming/StreamingContextSuite.scala     | 18 +++++++
 2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index bbdb4e8af036c..5abe1367752d9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.streaming
 
 import java.io.InputStream
-import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 
 import scala.collection.Map
 import scala.collection.mutable.Queue
@@ -28,8 +28,9 @@ import akka.actor.{Props, SupervisorStrategy}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{BytesWritable, LongWritable, Text}
-import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
+import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
+
 import org.apache.spark._
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.input.FixedLengthBinaryInputFormat
@@ -37,8 +38,9 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver}
-import org.apache.spark.streaming.scheduler._
+import org.apache.spark.streaming.scheduler.{JobScheduler, StreamingListener}
 import org.apache.spark.streaming.ui.{StreamingJobProgressListener, StreamingTab}
+import org.apache.spark.util.CallSite
 
 /**
  * Main entry point for Spark Streaming functionality. It provides methods used to create
@@ -202,6 +204,8 @@ class StreamingContext private[streaming] (
   import StreamingContextState._
   private[streaming] var state = Initialized
 
+  private val startSite = new AtomicReference[CallSite](null)
+
   /**
    * Return the associated Spark context
    */
@@ -518,6 +522,7 @@ class StreamingContext private[streaming] (
    * @throws SparkException if the context has already been started or stopped.
    */
   def start(): Unit = synchronized {
+    import StreamingContext._
     if (state == Started) {
       throw new SparkException("StreamingContext has already been started")
     }
@@ -525,10 +530,15 @@ class StreamingContext private[streaming] (
       throw new SparkException("StreamingContext has already been stopped")
     }
     validate()
-    sparkContext.setCallSite(DStream.getCreationSite())
-    scheduler.start()
-    uiTab.foreach(_.attach())
-    state = Started
+    startSite.set(DStream.getCreationSite())
+    sparkContext.setCallSite(startSite.get)
+    ACTIVATION_LOCK.synchronized {
+      assertNoOtherContextIsActive()
+      scheduler.start()
+      uiTab.foreach(_.attach())
+      state = Started
+      setActiveContext(this)
+    }
   }
 
   /**
@@ -603,6 +613,7 @@ class StreamingContext private[streaming] (
     uiTab.foreach(_.detach())
     // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
     state = Stopped
+    StreamingContext.setActiveContext(null)
   }
 }
 
@@ -612,8 +623,29 @@ class StreamingContext private[streaming] (
  */
 
 object StreamingContext extends Logging {
+  /**
+   * Lock that guards access to global variables that track active StreamingContext.
+   */
+  private val ACTIVATION_LOCK = new Object()
 
-  private[streaming] val DEFAULT_CLEANER_TTL = 3600
+  private val activeContext = new AtomicReference[StreamingContext](null)
+
+  private def assertNoOtherContextIsActive(): Unit = {
+    ACTIVATION_LOCK.synchronized {
+      if (activeContext.get() != null) {
+        throw new SparkException(
+          "Only one StreamingContext may be started in this JVM. " +
+            "Currently running StreamingContext was started at" +
+            activeContext.get.startSite.get.longForm)
+      }
+    }
+  }
+
+  private def setActiveContext(ssc: StreamingContext): Unit = {
+    ACTIVATION_LOCK.synchronized {
+      activeContext.set(ssc)
+    }
+  }
 
   @deprecated("Replaced by implicit functions in the DStream companion object. This is " +
     "kept here only for backward compatibility.", "1.3.0")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index a589deb1fa579..11c7fd835bfcd 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -480,6 +480,24 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("multiple streaming contexts") {
+    sc = new SparkContext(new SparkConf().setMaster(master).setAppName(appName))
+    ssc = new StreamingContext(sc, Seconds(1))
+    val input = addInputStream(ssc)
+    input.foreachRDD { rdd => rdd.count }
+    ssc.start()
+
+    // Creating another streaming context should not create errors
+    val anotherSsc = new StreamingContext(sc, Seconds(10))
+    val anotherInput = addInputStream(anotherSsc)
+    anotherInput.foreachRDD { rdd => rdd.count }
+
+    val exception = intercept[SparkException] {
+      anotherSsc.start()
+    }
+    assert(exception.getMessage.contains("StreamingContext"), "Did not get the right exception")
+  }
+
   test("DStream and generated RDD creation sites") {
     testPackage.test()
   }

From 0a4844f90a712e796c9404b422cea76d21a5d2e3 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 11:35:16 -0700
Subject: [PATCH 081/320] [SPARK-7462] By default retain group by columns in
 aggregate

Updated Java, Scala, Python, and R.

Author: Reynold Xin <rxin@databricks.com>
Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #5996 from rxin/groupby-retain and squashes the following commits:

aac7119 [Reynold Xin] Merge branch 'groupby-retain' of github.com:rxin/spark into groupby-retain
f6858f6 [Reynold Xin] Merge branch 'master' into groupby-retain
5f923c0 [Reynold Xin] Merge pull request #15 from shivaram/sparkr-groupby-retrain
c1de670 [Shivaram Venkataraman] Revert workaround in SparkR to retain grouped cols Based on reverting code added in commit https://github.com/amplab-extras/spark/commit/9a6be746efc9fafad88122fa2267862ef87aa0e1
b8b87e1 [Reynold Xin] Fixed DataFrameJoinSuite.
d910141 [Reynold Xin] Updated rest of the files
1e6e666 [Reynold Xin] [SPARK-7462] By default retain group by columns in aggregate
---
 R/pkg/R/group.R                               |   4 +-
 python/pyspark/sql/dataframe.py               |   2 +-
 .../org/apache/spark/sql/GroupedData.scala    |  15 +-
 .../scala/org/apache/spark/sql/SQLConf.scala  |   6 +
 .../org/apache/spark/sql/api/r/SQLUtils.scala |  11 -
 .../sql/execution/stat/StatFunctions.scala    |   2 +-
 .../spark/sql/DataFrameAggregateSuite.scala   | 193 ++++++++++++++++++
 .../apache/spark/sql/DataFrameJoinSuite.scala |   4 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 151 +-------------
 .../scala/org/apache/spark/sql/TestData.scala |   2 -
 10 files changed, 218 insertions(+), 172 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala

diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 5a7a8a2caba13..b758481997574 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -102,9 +102,7 @@ setMethod("agg",
                 }
               }
               jcols <- lapply(cols, function(c) { c@jc })
-              # the GroupedData.agg(col, cols*) API does not contain grouping Column
-              sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "aggWithGrouping",
-                                 x@sgd, listToSeq(jcols))
+              sdf <- callJMethod(x@sgd, "agg", jcols[[1]], listToSeq(jcols[-1]))
             } else {
               stop("agg can only support Column or character")
             }
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index a9697999e82cb..c2fa6c87384a6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1069,7 +1069,7 @@ def agg(self, *exprs):
 
         >>> from pyspark.sql import functions as F
         >>> gdf.agg(F.min(df.age)).collect()
-        [Row(MIN(age)=2), Row(MIN(age)=5)]
+        [Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
         """
         assert exprs, "exprs should not be empty"
         if len(exprs) == 1 and isinstance(exprs[0], dict):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 53ad67372e024..003a620dcc8ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -135,8 +135,9 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   }
 
   /**
-   * Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this
-   * class, the resulting [[DataFrame]] won't automatically include the grouping columns.
+   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
+   * default retains the grouping columns in its output. To not retain grouping columns, set
+   * `spark.sql.retainGroupColumns` to false.
    *
    * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
    *
@@ -158,7 +159,15 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
       case expr: NamedExpression => expr
       case expr: Expression => Alias(expr, expr.prettyString)()
     }
-    DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
+    if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
+      val retainedExprs = groupingExprs.map {
+        case expr: NamedExpression => expr
+        case expr: Expression => Alias(expr, expr.prettyString)()
+      }
+      DataFrame(df.sqlContext, Aggregate(groupingExprs, retainedExprs ++ aggExprs, df.logicalPlan))
+    } else {
+      DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 98a75bb4ed2df..dcac97beafb04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -74,6 +74,9 @@ private[spark] object SQLConf {
   // See SPARK-6231.
   val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY = "spark.sql.selfJoinAutoResolveAmbiguity"
 
+  // Whether to retain group by columns or not in GroupedData.agg.
+  val DATAFRAME_RETAIN_GROUP_COLUMNS = "spark.sql.retainGroupColumns"
+
   val USE_SQL_SERIALIZER2 = "spark.sql.useSerializer2"
 
   val USE_JACKSON_STREAMING_API = "spark.sql.json.useJacksonStreamingAPI"
@@ -242,6 +245,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
 
   private[spark] def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY, "true").toBoolean
+
+  private[spark] def dataFrameRetainGroupColumns: Boolean =
+    getConf(DATAFRAME_RETAIN_GROUP_COLUMNS, "true").toBoolean
   
   /** ********************** SQLConf functionality methods ************ */
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index ae77f72998a22..423ecdff5804a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -72,17 +72,6 @@ private[r] object SQLUtils {
     sqlContext.createDataFrame(rowRDD, schema)
   }
 
-  // A helper to include grouping columns in Agg()
-  def aggWithGrouping(gd: GroupedData, exprs: Column*): DataFrame = {
-    val aggExprs = exprs.map { col =>
-      col.expr match {
-        case expr: NamedExpression => expr
-        case expr: Expression => Alias(expr, expr.simpleString)()
-      }
-    }
-    gd.toDF(aggExprs)
-  }
-
   def dfToRowRDD(df: DataFrame): JavaRDD[Array[Byte]] = {
     df.map(r => rowToRBytes(r))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 71b7f6c2a6756..d22f5fd2d439c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -104,7 +104,7 @@ private[sql] object StatFunctions extends Logging {
   /** Generate a table of frequencies for the elements of two columns. */
   private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = {
     val tableName = s"${col1}_$col2"
-    val counts = df.groupBy(col1, col2).agg(col(col1), col(col2), count("*")).take(1e6.toInt)
+    val counts = df.groupBy(col1, col2).agg(count("*")).take(1e6.toInt)
     if (counts.length == 1e6.toInt) {
       logWarning("The maximum limit of 1e6 pairs have been collected, which may not be all of " +
         "the pairs. Please try reducing the amount of distinct items in your columns.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
new file mode 100644
index 0000000000000..35a574f354741
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.test.TestSQLContext.implicits._
+import org.apache.spark.sql.types.DecimalType
+
+
+class DataFrameAggregateSuite extends QueryTest {
+
+  test("groupBy") {
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b")),
+      Seq(Row(1, 3), Row(2, 3), Row(3, 3))
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum('totB)),
+      Row(9)
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(count("*")),
+      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(Map("*" -> "count")),
+      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
+    )
+    checkAnswer(
+      testData2.groupBy("a").agg(Map("b" -> "sum")),
+      Row(1, 3) :: Row(2, 3) :: Row(3, 3) :: Nil
+    )
+
+    val df1 = Seq(("a", 1, 0, "b"), ("b", 2, 4, "c"), ("a", 2, 3, "d"))
+      .toDF("key", "value1", "value2", "rest")
+
+    checkAnswer(
+      df1.groupBy("key").min(),
+      df1.groupBy("key").min("value1", "value2").collect()
+    )
+    checkAnswer(
+      df1.groupBy("key").min("value2"),
+      Seq(Row("a", 0), Row("b", 4))
+    )
+  }
+
+  test("spark.sql.retainGroupColumns config") {
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b")),
+      Seq(Row(1, 3), Row(2, 3), Row(3, 3))
+    )
+
+    TestSQLContext.conf.setConf("spark.sql.retainGroupColumns", "false")
+    checkAnswer(
+      testData2.groupBy("a").agg(sum($"b")),
+      Seq(Row(3), Row(3), Row(3))
+    )
+    TestSQLContext.conf.setConf("spark.sql.retainGroupColumns", "true")
+  }
+
+  test("agg without groups") {
+    checkAnswer(
+      testData2.agg(sum('b)),
+      Row(9)
+    )
+  }
+
+  test("average") {
+    checkAnswer(
+      testData2.agg(avg('a)),
+      Row(2.0))
+
+    // Also check mean
+    checkAnswer(
+      testData2.agg(mean('a)),
+      Row(2.0))
+
+    checkAnswer(
+      testData2.agg(avg('a), sumDistinct('a)), // non-partial
+      Row(2.0, 6.0) :: Nil)
+
+    checkAnswer(
+      decimalData.agg(avg('a)),
+      Row(new java.math.BigDecimal(2.0)))
+    checkAnswer(
+      decimalData.agg(avg('a), sumDistinct('a)), // non-partial
+      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
+
+    checkAnswer(
+      decimalData.agg(avg('a cast DecimalType(10, 2))),
+      Row(new java.math.BigDecimal(2.0)))
+    // non-partial
+    checkAnswer(
+      decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))),
+      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
+  }
+
+  test("null average") {
+    checkAnswer(
+      testData3.agg(avg('b)),
+      Row(2.0))
+
+    checkAnswer(
+      testData3.agg(avg('b), countDistinct('b)),
+      Row(2.0, 1))
+
+    checkAnswer(
+      testData3.agg(avg('b), sumDistinct('b)), // non-partial
+      Row(2.0, 2.0))
+  }
+
+  test("zero average") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    checkAnswer(
+      emptyTableData.agg(avg('a)),
+      Row(null))
+
+    checkAnswer(
+      emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
+      Row(null, null))
+  }
+
+  test("count") {
+    assert(testData2.count() === testData2.map(_ => 1).count())
+
+    checkAnswer(
+      testData2.agg(count('a), sumDistinct('a)), // non-partial
+      Row(6, 6.0))
+  }
+
+  test("null count") {
+    checkAnswer(
+      testData3.groupBy('a).agg(count('b)),
+      Seq(Row(1,0), Row(2, 1))
+    )
+
+    checkAnswer(
+      testData3.groupBy('a).agg(count('a + 'b)),
+      Seq(Row(1,0), Row(2, 1))
+    )
+
+    checkAnswer(
+      testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
+      Row(2, 1, 2, 2, 1)
+    )
+
+    checkAnswer(
+      testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
+      Row(1, 1, 2)
+    )
+  }
+
+  test("zero count") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    assert(emptyTableData.count() === 0)
+
+    checkAnswer(
+      emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
+      Row(0, null))
+  }
+
+  test("zero sum") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    checkAnswer(
+      emptyTableData.agg(sum('a)),
+      Row(null))
+  }
+
+  test("zero sum distinct") {
+    val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
+    checkAnswer(
+      emptyTableData.agg(sumDistinct('a)),
+      Row(null))
+  }
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index f005f55b6432e..787f3f175fea2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -77,8 +77,8 @@ class DataFrameJoinSuite extends QueryTest {
       df.join(df, df("key") === df("key") && df("value") === 1),
       Row(1, "1", 1, "1") :: Nil)
 
-    val left = df.groupBy("key").agg($"key", count("*"))
-    val right = df.groupBy("key").agg($"key", sum("key"))
+    val left = df.groupBy("key").agg(count("*"))
+    val right = df.groupBy("key").agg(sum("key"))
     checkAnswer(
       left.join(right, left("key") === right("key")),
       Row(1, 1, 1, 1) :: Row(2, 1, 2, 2) :: Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index cf590cbd5219c..7552c1288165f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -22,7 +22,6 @@ import scala.language.postfixOps
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.test.{ExamplePointUDT, ExamplePoint, TestSQLContext}
-import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
 import org.apache.spark.sql.test.TestSQLContext.implicits._
 
 
@@ -63,7 +62,7 @@ class DataFrameSuite extends QueryTest {
     val df = Seq((1,(1,1))).toDF()
 
     checkAnswer(
-      df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"),
+      df.groupBy("_1").agg(sum("_2._1")).toDF("key", "total"),
       Row(1, 1) :: Nil)
   }
 
@@ -128,7 +127,7 @@ class DataFrameSuite extends QueryTest {
       df2
         .select('_1 as 'letter, 'number)
         .groupBy('letter)
-        .agg('letter, countDistinct('number)),
+        .agg(countDistinct('number)),
       Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil
     )
   }
@@ -165,48 +164,6 @@ class DataFrameSuite extends QueryTest {
       testData.select('key).collect().toSeq)
   }
 
-  test("groupBy") {
-    checkAnswer(
-      testData2.groupBy("a").agg($"a", sum($"b")),
-      Seq(Row(1, 3), Row(2, 3), Row(3, 3))
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg($"a", sum($"b").as("totB")).agg(sum('totB)),
-      Row(9)
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg(col("a"), count("*")),
-      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg(Map("*" -> "count")),
-      Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil
-    )
-    checkAnswer(
-      testData2.groupBy("a").agg(Map("b" -> "sum")),
-      Row(1, 3) :: Row(2, 3) :: Row(3, 3) :: Nil
-    )
-
-    val df1 = Seq(("a", 1, 0, "b"), ("b", 2, 4, "c"), ("a", 2, 3, "d"))
-      .toDF("key", "value1", "value2", "rest")
-
-    checkAnswer(
-      df1.groupBy("key").min(),
-      df1.groupBy("key").min("value1", "value2").collect()
-    )
-    checkAnswer(
-      df1.groupBy("key").min("value2"),
-      Seq(Row("a", 0), Row("b", 4))
-    )
-  }
-
-  test("agg without groups") {
-    checkAnswer(
-      testData2.agg(sum('b)),
-      Row(9)
-    )
-  }
-
   test("convert $\"attribute name\" into unresolved attribute") {
     checkAnswer(
       testData.where($"key" === lit(1)).select($"value"),
@@ -303,110 +260,6 @@ class DataFrameSuite extends QueryTest {
       mapData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq)))
   }
 
-  test("average") {
-    checkAnswer(
-      testData2.agg(avg('a)),
-      Row(2.0))
-
-    // Also check mean
-    checkAnswer(
-      testData2.agg(mean('a)),
-      Row(2.0))
-
-    checkAnswer(
-      testData2.agg(avg('a), sumDistinct('a)), // non-partial
-      Row(2.0, 6.0) :: Nil)
-
-    checkAnswer(
-      decimalData.agg(avg('a)),
-      Row(new java.math.BigDecimal(2.0)))
-    checkAnswer(
-      decimalData.agg(avg('a), sumDistinct('a)), // non-partial
-      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
-
-    checkAnswer(
-      decimalData.agg(avg('a cast DecimalType(10, 2))),
-      Row(new java.math.BigDecimal(2.0)))
-    // non-partial
-    checkAnswer(
-      decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))),
-      Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil)
-  }
-
-  test("null average") {
-    checkAnswer(
-      testData3.agg(avg('b)),
-      Row(2.0))
-
-    checkAnswer(
-      testData3.agg(avg('b), countDistinct('b)),
-      Row(2.0, 1))
-
-    checkAnswer(
-      testData3.agg(avg('b), sumDistinct('b)), // non-partial
-      Row(2.0, 2.0))
-  }
-
-  test("zero average") {
-    checkAnswer(
-      emptyTableData.agg(avg('a)),
-      Row(null))
-
-    checkAnswer(
-      emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
-      Row(null, null))
-  }
-
-  test("count") {
-    assert(testData2.count() === testData2.map(_ => 1).count())
-
-    checkAnswer(
-      testData2.agg(count('a), sumDistinct('a)), // non-partial
-      Row(6, 6.0))
-  }
-
-  test("null count") {
-    checkAnswer(
-      testData3.groupBy('a).agg('a, count('b)),
-      Seq(Row(1,0), Row(2, 1))
-    )
-
-    checkAnswer(
-      testData3.groupBy('a).agg('a, count('a + 'b)),
-      Seq(Row(1,0), Row(2, 1))
-    )
-
-    checkAnswer(
-      testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
-      Row(2, 1, 2, 2, 1)
-    )
-
-    checkAnswer(
-      testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
-      Row(1, 1, 2)
-    )
-  }
-
-  test("zero count") {
-    assert(emptyTableData.count() === 0)
-
-    checkAnswer(
-      emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
-      Row(0, null))
-  }
-
-  test("zero sum") {
-    checkAnswer(
-      emptyTableData.agg(sum('a)),
-      Row(null))
-  }
-
-  test("zero sum distinct") {
-    checkAnswer(
-      emptyTableData.agg(sumDistinct('a)),
-      Row(null))
-  }
-
   test("except") {
     checkAnswer(
       lowerCaseData.except(upperCaseData),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 225b51bd73d6c..446771ab2a5a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -86,8 +86,6 @@ object TestData {
       TestData3(2, Some(2)) :: Nil).toDF()
   testData3.registerTempTable("testData3")
 
-  val emptyTableData = logical.LocalRelation($"a".int, $"b".int)
-
   case class UpperCaseData(N: Int, L: String)
   val upperCaseData =
     TestSQLContext.sparkContext.parallelize(

From 82fee9d9aad2c9ba2fb4bd658579fe99218cafac Mon Sep 17 00:00:00 2001
From: Sandy Ryza <sandy@cloudera.com>
Date: Mon, 11 May 2015 12:09:39 -0700
Subject: [PATCH 082/320] [SPARK-6470] [YARN] Add support for YARN node labels.

This is difficult to write a test for because it relies on the latest version of YARN, but I verified manually that the patch does pass along the label expression on this version and containers are successfully launched.

Author: Sandy Ryza <sandy@cloudera.com>

Closes #5242 from sryza/sandy-spark-6470 and squashes the following commits:

6af87b9 [Sandy Ryza] Change info to warning
6e22d99 [Sandy Ryza] [YARN] SPARK-6470.  Add support for YARN node labels.
---
 docs/running-on-yarn.md                       |  9 ++++++
 .../spark/deploy/yarn/YarnAllocator.scala     | 31 ++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 4fb4a90307ec8..51c1339165024 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -220,6 +220,15 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
   Otherwise, the client process will exit after submission.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.executor.nodeLabelExpression</code></td>
+  <td>(none)</td>
+  <td>
+  A YARN node label expression that restricts the set of nodes executors will be scheduled on.
+  Only versions of YARN greater than or equal to 2.6 support node label expressions, so when
+  running against earlier versions, this property will be ignored.
+  </td>
+</tr>
 </table>
 
 # Launching Spark on YARN
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 88d68d5556162..8a08f561a2df2 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -117,6 +117,24 @@ private[yarn] class YarnAllocator(
   // For testing
   private val launchContainers = sparkConf.getBoolean("spark.yarn.launchContainers", true)
 
+  private val labelExpression = sparkConf.getOption("spark.yarn.executor.nodeLabelExpression")
+
+  // ContainerRequest constructor that can take a node label expression. We grab it through
+  // reflection because it's only available in later versions of YARN.
+  private val nodeLabelConstructor = labelExpression.flatMap { expr =>
+    try {
+      Some(classOf[ContainerRequest].getConstructor(classOf[Resource],
+        classOf[Array[String]], classOf[Array[String]], classOf[Priority], classOf[Boolean],
+        classOf[String]))
+    } catch {
+      case e: NoSuchMethodException => {
+        logWarning(s"Node label expression $expr will be ignored because YARN version on" +
+          " classpath does not support it.")
+        None
+      }
+    }
+  }
+
   def getNumExecutorsRunning: Int = numExecutorsRunning
 
   def getNumExecutorsFailed: Int = numExecutorsFailed
@@ -211,7 +229,7 @@ private[yarn] class YarnAllocator(
         s"cores and ${resource.getMemory} MB memory including $memoryOverhead MB overhead")
 
       for (i <- 0 until missing) {
-        val request = new ContainerRequest(resource, null, null, RM_REQUEST_PRIORITY)
+        val request = createContainerRequest(resource)
         amClient.addContainerRequest(request)
         val nodes = request.getNodes
         val hostStr = if (nodes == null || nodes.isEmpty) "Any" else nodes.last
@@ -230,6 +248,17 @@ private[yarn] class YarnAllocator(
     }
   }
 
+  /**
+   * Creates a container request, handling the reflection required to use YARN features that were
+   * added in recent versions.
+   */
+  private def createContainerRequest(resource: Resource): ContainerRequest = {
+    nodeLabelConstructor.map { constructor =>
+      constructor.newInstance(resource, null, null, RM_REQUEST_PRIORITY, true: java.lang.Boolean,
+        labelExpression.orNull)
+    }.getOrElse(new ContainerRequest(resource, null, null, RM_REQUEST_PRIORITY))
+  }
+
   /**
    * Handle containers granted by the RM by launching executors on them.
    *

From 7ce2a33c3acffa17ae32f48ebb40b69b9b36dae4 Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@hortonworks.com>
Date: Mon, 11 May 2015 13:35:06 -0700
Subject: [PATCH 083/320] [SPARK-7508] JettyUtils-generated servlets to log &
 report all errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch for SPARK-7508

This logs  warn then generates a response which include the message body and stack trace as text/plain, no-cache. The exit code is 500.

In practise (in some tests in SPARK-1537 to be precise), jetty is getting in between this servlet and the web response the user sees —the body of the response is lost for any error response (500, even 404 and bad request). The standard Jetty handlers must be getting in the way.

This patch doesn't address that, it ensures that
1. if the jetty handlers were put to one side the users would see the errors
2. at least the exceptions appear in the server-side logs.

This is better to users saying "I saw a 500 error" and you not having anything in the logs to see what went wrong.

Author: Steve Loughran <stevel@hortonworks.com>

Closes #6033 from steveloughran/stevel/feature/SPARK-7508-JettyUtils and squashes the following commits:

584836f [Steve Loughran] SPARK-7508 drop trailing semicolon
ad6f185 [Steve Loughran] SPARK-7508: jetty handles exception reporting itself; spark just sets this up and logs exceptions before being relayed
258d9f9 [Steve Loughran] SPARK-7508 fix typo manually-edited before patch pushed
69c8263 [Steve Loughran] SPARK-7508 JettyUtils-generated servlets to log & report all errors
---
 core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index dfd6fdb5e9993..06e616220c706 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -78,6 +78,9 @@ private[spark] object JettyUtils extends Logging {
         } catch {
           case e: IllegalArgumentException =>
             response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage)
+          case e: Exception =>
+            logWarning(s"GET ${request.getRequestURI} failed: $e", e)
+            throw e
         }
       }
       // SPARK-5983 ensure TRACE is not supported
@@ -217,6 +220,9 @@ private[spark] object JettyUtils extends Logging {
       val pool = new QueuedThreadPool
       pool.setDaemon(true)
       server.setThreadPool(pool)
+      val errorHandler = new ErrorHandler()
+      errorHandler.setShowStacks(true)
+      server.addBean(errorHandler)
       server.setHandler(collection)
       try {
         server.start()

From 6e9910c21ada19ae14122b5f14a6a30845b98229 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Mon, 11 May 2015 14:19:11 -0700
Subject: [PATCH 084/320] [SPARK-7515] [DOC] Update documentation for PySpark
 on YARN with cluster mode

Now PySpark on YARN with cluster mode is supported so let's update doc.

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #6040 from sarutak/update-doc-for-pyspark-on-yarn and squashes the following commits:

ad9f88c [Kousuke Saruta] Brushed up sentences
469fd2e [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into update-doc-for-pyspark-on-yarn
fcfdb92 [Kousuke Saruta] Updated doc for PySpark on YARN with cluster mode
---
 docs/submitting-applications.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 3ecbf2308cd44..e58645274e525 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -59,7 +59,7 @@ for applications that involve the REPL (e.g. Spark shell).
 Alternatively, if your application is submitted from a machine far from the worker machines (e.g.
 locally on your laptop), it is common to use `cluster` mode to minimize network latency between
 the drivers and the executors. Note that `cluster` mode is currently not supported for
-Mesos clusters or Python applications.
+Mesos clusters. Currently only YARN supports cluster mode for Python applications.
 
 For Python applications, simply pass a `.py` file in the place of `<application-jar>` instead of a JAR,
 and add Python `.zip`, `.egg` or `.py` files to the search path with `--py-files`.

From 8e674331d9ce98068b44e4d483b6d35cef0648fa Mon Sep 17 00:00:00 2001
From: gchen <chenguancheng@gmail.com>
Date: Mon, 11 May 2015 14:37:18 -0700
Subject: [PATCH 085/320] [SPARK-7516] [Minor] [DOC] Replace depreciated
 inferSchema() with createDataFrame()

JIRA: https://issues.apache.org/jira/browse/SPARK-7516

In sql-programming-guide, deprecated python data frame api inferSchema() should be replaced by createDataFrame():

schemaPeople = sqlContext.inferSchema(people) ->
schemaPeople = sqlContext.createDataFrame(people)

Author: gchen <chenguancheng@gmail.com>

Closes #6041 from gchen/python-docs and squashes the following commits:

c27eb7c [gchen] replace inferSchema() with createDataFrame()
---
 docs/sql-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index df4c123bdd86c..6af10432b9d03 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -470,7 +470,7 @@ parts = lines.map(lambda l: l.split(","))
 people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))
 
 # Infer the schema, and register the DataFrame as a table.
-schemaPeople = sqlContext.inferSchema(people)
+schemaPeople = sqlContext.createDataFrame(people)
 schemaPeople.registerTempTable("people")
 
 # SQL can be run over DataFrames that have been registered as a table.

From 25c01c54840a9ab768f8b917de7edc2bc2d61b9e Mon Sep 17 00:00:00 2001
From: jerryshao <saisai.shao@intel.com>
Date: Mon, 11 May 2015 14:38:58 -0700
Subject: [PATCH 086/320] [STREAMING] [MINOR] Close files correctly when
 iterator is finished in streaming WAL recovery

Currently there's no chance to close the file correctly after the iteration is finished, change to `CompletionIterator` to avoid resource leakage.

Author: jerryshao <saisai.shao@intel.com>

Closes #6050 from jerryshao/close-file-correctly and squashes the following commits:

52dfaf5 [jerryshao] Close files correctly when iterator is finished
---
 .../apache/spark/streaming/util/FileBasedWriteAheadLog.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 9985fedc35141..87ba4f84a9ceb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -26,7 +26,7 @@ import scala.language.postfixOps
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{CompletionIterator, ThreadUtils}
 import org.apache.spark.{Logging, SparkConf}
 
 /**
@@ -124,7 +124,8 @@ private[streaming] class FileBasedWriteAheadLog(
 
     logFilesToRead.iterator.map { file =>
       logDebug(s"Creating log reader with $file")
-      new FileBasedWriteAheadLogReader(file, hadoopConf)
+      val reader = new FileBasedWriteAheadLogReader(file, hadoopConf)
+      CompletionIterator[ByteBuffer, Iterator[ByteBuffer]](reader, reader.close _)
     } flatMap { x => x }
   }
 

From a8ea09683acc071cd81b244e8d2b7d9638b1aced Mon Sep 17 00:00:00 2001
From: LCY Vincent <lauchunyin@gmail.com>
Date: Mon, 11 May 2015 14:48:10 -0700
Subject: [PATCH 087/320] Update Documentation: leftsemi instead of semijoin

should sync up with here?
https://github.com/apache/spark/blob/119f45d61d7b48d376cca05e1b4f0c7fcf65bfa8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala#L26

Author: LCY Vincent <lauchunyin@gmail.com>

Closes #5944 from vincentlaucy/master and squashes the following commits:

fc0e454 [LCY Vincent] Update DataFrame.scala
---
 sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 7947042c14299..c92ca607fbd54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -433,7 +433,7 @@ class DataFrame private[sql](
    *
    * @param right Right side of the join.
    * @param joinExprs Join expression.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `semijoin`.
+   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
    * @group dfops
    */
   def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {

From 91dc3dfd77126afbb894a783b7201b87b59f73cb Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 12 May 2015 00:25:43 +0100
Subject: [PATCH 088/320] [MINOR] [DOCS] Fix the link to test building info on
 the wiki

Author: Sean Owen <sowen@cloudera.com>

Closes #6063 from srowen/FixRunningTestsLink and squashes the following commits:

db62018 [Sean Owen] Fix the link to test building info on the wiki
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c3afc4db9c63c..9c09d40e2bdae 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ can be run using:
     ./dev/run-tests
 
 Please see the guidance on how to 
-[run all automated tests](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-AutomatedTesting).
+[run tests for a module, or individual tests](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools).
 
 ## A Note About Hadoop Versions
 

From 4f4dbb030c208caba18f314a1ef1751696627d26 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 17:02:11 -0700
Subject: [PATCH 089/320] [SQL] Show better error messages for incorrect join
 types in DataFrames.

As a follow-up to https://github.com/apache/spark/pull/5944

Author: Reynold Xin <rxin@databricks.com>

Closes #6064 from rxin/jointype-better-error and squashes the following commits:

7629bf7 [Reynold Xin] [SQL] Show better error messages for incorrect join types in DataFrames.
---
 .../apache/spark/sql/catalyst/plans/joinTypes.scala    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 5dc0539caec24..77dec7ca6e2b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -24,6 +24,16 @@ object JoinType {
     case "leftouter" | "left" => LeftOuter
     case "rightouter" | "right" => RightOuter
     case "leftsemi" => LeftSemi
+    case _ =>
+      val supported = Seq(
+        "inner",
+        "outer", "full", "fullouter",
+        "leftouter", "left",
+        "rightouter", "right",
+        "leftsemi")
+
+      throw new IllegalArgumentException(s"Unsupported join type '$typ'. " +
+        "Supported join types include: " + supported.mkString("'", "', '", "'") + ".")
   }
 }
 

From 57255dcd794222f4db5df1e549ebc7b896cebfdc Mon Sep 17 00:00:00 2001
From: madhukar <phatak.dev@gmail.com>
Date: Mon, 11 May 2015 17:04:11 -0700
Subject: [PATCH 090/320] [SPARK-7084] improve saveAsTable documentation

Author: madhukar <phatak.dev@gmail.com>

Closes #5654 from phatak-dev/master and squashes the following commits:

386f407 [madhukar] #5654 updated for all the methods
2c997c5 [madhukar] Merge branch 'master' of https://github.com/apache/spark
00bc819 [madhukar] Merge branch 'master' of https://github.com/apache/spark
2a802c6 [madhukar] #5654 updated the doc according to comments
866e8df [madhukar] [SPARK-7084] improve saveAsTable documentation
---
 .../scala/org/apache/spark/sql/DataFrame.scala | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index c92ca607fbd54..729cfc1da25b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1192,6 +1192,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1208,6 +1211,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1232,6 +1238,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1248,6 +1257,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1264,6 +1276,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental
@@ -1285,6 +1300,9 @@ class DataFrame private[sql](
    * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
+   *
+   * Also note that while this function can persist the table metadata into Hive's metastore,
+   * the table will NOT be accessible from Hive.
    * @group output
    */
   @Experimental

From 3a9b6997df3fef1052d8c410f32319018c52acff Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 18:07:12 -0700
Subject: [PATCH 091/320] [SPARK-7462][SQL] Update documentation for retaining
 grouping columns in DataFrames.

Author: Reynold Xin <rxin@databricks.com>

Closes #6062 from rxin/agg-retain-doc and squashes the following commits:

43e511e [Reynold Xin] [SPARK-7462][SQL] Update documentation for retaining grouping columns in DataFrames.
---
 docs/sql-programming-guide.md                 | 60 ++++++++++++++++++-
 python/pyspark/sql/_types.py                  |  2 +
 .../org/apache/spark/sql/GroupedData.scala    | 14 ++++-
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 6af10432b9d03..6b7b867ea6b5d 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1594,6 +1594,64 @@ options.
 
 # Migration Guide
 
+## Upgrading from Spark SQL 1.3 to 1.4
+
+Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg($"department", max("age"), sum("expense"))
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"))
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"));
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false");
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+
+import pyspark.sql.functions as func
+
+# In 1.3.x, in order for the grouping column "department" to show up,
+# it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg("department"), func.max("age"), func.sum("expense"))
+
+# In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(func.max("age"), func.sum("expense"))
+
+# Revert to 1.3.x behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+</div>
+
+
 ## Upgrading from Spark SQL 1.0-1.2 to 1.3
 
 In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
@@ -1651,7 +1709,7 @@ moved into the udf object in `SQLContext`.
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
-{% highlight java %}
+{% highlight scala %}
 
 sqlContext.udf.register("strLen", (s: String) => s.length())
 
diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/_types.py
index fd98e116d2cf1..b96851a174d49 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/_types.py
@@ -1228,12 +1228,14 @@ def __getattr__(self, item):
             raise AttributeError(item)
 
     def __reduce__(self):
+        """Returns a tuple so Python knows how to pickle Row."""
         if hasattr(self, "__fields__"):
             return (_create_row, (self.__fields__, tuple(self)))
         else:
             return tuple.__reduce__(self)
 
     def __repr__(self):
+        """Printable representation of Row used in Python REPL."""
         if hasattr(self, "__fields__"):
             return "Row(%s)" % ", ".join("%s=%r" % (k, v)
                                          for k, v in zip(self.__fields__, tuple(self)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 003a620dcc8ce..543320e471bf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -146,11 +146,21 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *
    *   // Scala:
    *   import org.apache.spark.sql.functions._
-   *   df.groupBy("department").agg($"department", max($"age"), sum($"expense"))
+   *   df.groupBy("department").agg(max("age"), sum("expense"))
    *
    *   // Java:
    *   import static org.apache.spark.sql.functions.*;
-   *   df.groupBy("department").agg(col("department"), max(col("age")), sum(col("expense")));
+   *   df.groupBy("department").agg(max("age"), sum("expense"));
+   * }}}
+   *
+   * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change
+   * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`.
+   * {{{
+   *   // Scala, 1.3.x:
+   *   df.groupBy("department").agg($"department", max("age"), sum("expense"))
+   *
+   *   // Java, 1.3.x:
+   *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
    * }}}
    */
   @scala.annotation.varargs

From 87229c95c6b597f5b84e36d518b9830e3ba63424 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 18:10:45 -0700
Subject: [PATCH 092/320] Updated DataFrame.saveAsTable Hive warning to include
 SPARK-7550 ticket.

So users that are interested in this can track it easily.

Author: Reynold Xin <rxin@databricks.com>

Closes #6067 from rxin/SPARK-7550 and squashes the following commits:

ee0e34c [Reynold Xin] Updated DataFrame.saveAsTable Hive warning to include SPARK-7550 ticket.
---
 .../main/scala/org/apache/spark/sql/DataFrame.scala  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 729cfc1da25b2..2472999de37f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1194,7 +1194,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1213,7 +1213,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1240,7 +1240,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1259,7 +1259,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1278,7 +1278,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental
@@ -1302,7 +1302,7 @@ class DataFrame private[sql](
    * be the target of an `insertInto`.
    *
    * Also note that while this function can persist the table metadata into Hive's metastore,
-   * the table will NOT be accessible from Hive.
+   * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
    */
   @Experimental

From 35fb42a0b01d3043b7d5e27256d1b45a08583aab Mon Sep 17 00:00:00 2001
From: Xusen Yin <yinxusen@gmail.com>
Date: Mon, 11 May 2015 18:41:22 -0700
Subject: [PATCH 093/320] [SPARK-5893] [ML] Add bucketizer

JIRA issue [here](https://issues.apache.org/jira/browse/SPARK-5893).

One thing to make clear, the `buckets` parameter, which is an array of `Double`, performs as split points. Say,

```scala
buckets = Array(-0.5, 0.0, 0.5)
```

splits the real number into 4 ranges, (-inf, -0.5], (-0.5, 0.0], (0.0, 0.5], (0.5, +inf), which is encoded as 0, 1, 2, 3.

Author: Xusen Yin <yinxusen@gmail.com>
Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5980 from yinxusen/SPARK-5893 and squashes the following commits:

dc8c843 [Xusen Yin] Merge pull request #4 from jkbradley/yinxusen-SPARK-5893
1ca973a [Joseph K. Bradley] one more bucketizer test
34f124a [Joseph K. Bradley] Removed lowerInclusive, upperInclusive params from Bucketizer, and used splits instead.
eacfcfa [Xusen Yin] change ML attribute from splits into buckets
c3cc770 [Xusen Yin] add more unit test for binary search
3a16cc2 [Xusen Yin] refine comments and names
ac77859 [Xusen Yin] fix style error
fb30d79 [Xusen Yin] fix and test binary search
2466322 [Xusen Yin] refactor Bucketizer
11fb00a [Xusen Yin] change it into an Estimator
998bc87 [Xusen Yin] check buckets
4024cf1 [Xusen Yin] add test suite
5fe190e [Xusen Yin] add bucketizer
---
 .../apache/spark/ml/feature/Bucketizer.scala  | 131 ++++++++++++++++
 .../apache/spark/ml/util/SchemaUtils.scala    |  11 ++
 .../spark/ml/feature/BucketizerSuite.scala    | 148 ++++++++++++++++++
 3 files changed, 290 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
new file mode 100644
index 0000000000000..7dba64bc3506f
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.attribute.NominalAttribute
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
+
+/**
+ * :: AlphaComponent ::
+ * `Bucketizer` maps a column of continuous features to a column of feature buckets.
+ */
+@AlphaComponent
+final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
+  extends Model[Bucketizer] with HasInputCol with HasOutputCol {
+
+  def this() = this(null)
+
+  /**
+   * Parameter for mapping continuous features into buckets. With n splits, there are n+1 buckets.
+   * A bucket defined by splits x,y holds values in the range [x,y). Splits should be strictly
+   * increasing. Values at -inf, inf must be explicitly provided to cover all Double values;
+   * otherwise, values outside the splits specified will be treated as errors.
+   * @group param
+   */
+  val splits: Param[Array[Double]] = new Param[Array[Double]](this, "splits",
+    "Split points for mapping continuous features into buckets. With n splits, there are n+1 " +
+      "buckets. A bucket defined by splits x,y holds values in the range [x,y). The splits " +
+      "should be strictly increasing. Values at -inf, inf must be explicitly provided to cover" +
+      " all Double values; otherwise, values outside the splits specified will be treated as" +
+      " errors.",
+    Bucketizer.checkSplits)
+
+  /** @group getParam */
+  def getSplits: Array[Double] = $(splits)
+
+  /** @group setParam */
+  def setSplits(value: Array[Double]): this.type = set(splits, value)
+
+  /** @group setParam */
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def transform(dataset: DataFrame): DataFrame = {
+    transformSchema(dataset.schema)
+    val bucketizer = udf { feature: Double =>
+      Bucketizer.binarySearchForBuckets($(splits), feature)
+    }
+    val newCol = bucketizer(dataset($(inputCol)))
+    val newField = prepOutputField(dataset.schema)
+    dataset.withColumn($(outputCol), newCol.as($(outputCol), newField.metadata))
+  }
+
+  private def prepOutputField(schema: StructType): StructField = {
+    val buckets = $(splits).sliding(2).map(bucket => bucket.mkString(", ")).toArray
+    val attr = new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true),
+      values = Some(buckets))
+    attr.toStructField()
+  }
+
+  override def transformSchema(schema: StructType): StructType = {
+    SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
+    SchemaUtils.appendColumn(schema, prepOutputField(schema))
+  }
+}
+
+private[feature] object Bucketizer {
+  /** We require splits to be of length >= 3 and to be in strictly increasing order. */
+  def checkSplits(splits: Array[Double]): Boolean = {
+    if (splits.length < 3) {
+      false
+    } else {
+      var i = 0
+      while (i < splits.length - 1) {
+        if (splits(i) >= splits(i + 1)) return false
+        i += 1
+      }
+      true
+    }
+  }
+
+  /**
+   * Binary searching in several buckets to place each data point.
+   * @throws RuntimeException if a feature is < splits.head or >= splits.last
+   */
+  def binarySearchForBuckets(
+      splits: Array[Double],
+      feature: Double): Double = {
+    // Check bounds.  We make an exception for +inf so that it can exist in some bin.
+    if ((feature < splits.head) || (feature >= splits.last && feature != Double.PositiveInfinity)) {
+      throw new RuntimeException(s"Feature value $feature out of Bucketizer bounds" +
+        s" [${splits.head}, ${splits.last}).  Check your features, or loosen " +
+        s"the lower/upper bound constraints.")
+    }
+    var left = 0
+    var right = splits.length - 2
+    while (left < right) {
+      val mid = (left + right) / 2
+      val split = splits(mid + 1)
+      if (feature < split) {
+        right = mid
+      } else {
+        left = mid + 1
+      }
+    }
+    left
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
index 0383bf0b382b7..11592b77eb356 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
@@ -58,4 +58,15 @@ object SchemaUtils {
     val outputFields = schema.fields :+ StructField(colName, dataType, nullable = false)
     StructType(outputFields)
   }
+
+  /**
+   * Appends a new column to the input schema. This fails if the given output column already exists.
+   * @param schema input schema
+   * @param col New column schema
+   * @return new schema with the input column appended
+   */
+  def appendColumn(schema: StructType, col: StructField): StructType = {
+    require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.")
+    StructType(schema.fields :+ col)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
new file mode 100644
index 0000000000000..acb46c0a35709
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkException
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+
+class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient private var sqlContext: SQLContext = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+  }
+
+  test("Bucket continuous features, without -inf,inf") {
+    // Check a set of valid feature values.
+    val splits = Array(-0.5, 0.0, 0.5)
+    val validData = Array(-0.5, -0.3, 0.0, 0.2)
+    val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0)
+    val dataFrame: DataFrame =
+      sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
+
+    val bucketizer: Bucketizer = new Bucketizer()
+      .setInputCol("feature")
+      .setOutputCol("result")
+      .setSplits(splits)
+
+    bucketizer.transform(dataFrame).select("result", "expected").collect().foreach {
+      case Row(x: Double, y: Double) =>
+        assert(x === y,
+          s"The feature value is not correct after bucketing.  Expected $y but found $x")
+    }
+
+    // Check for exceptions when using a set of invalid feature values.
+    val invalidData1: Array[Double] = Array(-0.9) ++ validData
+    val invalidData2 = Array(0.5) ++ validData
+    val badDF1 = sqlContext.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx")
+    intercept[RuntimeException]{
+      bucketizer.transform(badDF1).collect()
+      println("Invalid feature value -0.9 was not caught as an invalid feature!")
+    }
+    val badDF2 = sqlContext.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx")
+    intercept[RuntimeException]{
+      bucketizer.transform(badDF2).collect()
+      println("Invalid feature value 0.5 was not caught as an invalid feature!")
+    }
+  }
+
+  test("Bucket continuous features, with -inf,inf") {
+    val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
+    val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9)
+    val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0)
+    val dataFrame: DataFrame =
+      sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
+
+    val bucketizer: Bucketizer = new Bucketizer()
+      .setInputCol("feature")
+      .setOutputCol("result")
+      .setSplits(splits)
+
+    bucketizer.transform(dataFrame).select("result", "expected").collect().foreach {
+      case Row(x: Double, y: Double) =>
+        assert(x === y,
+          s"The feature value is not correct after bucketing.  Expected $y but found $x")
+    }
+  }
+
+  test("Binary search correctness on hand-picked examples") {
+    import BucketizerSuite.checkBinarySearch
+    // length 3, with -inf
+    checkBinarySearch(Array(Double.NegativeInfinity, 0.0, 1.0))
+    // length 4
+    checkBinarySearch(Array(-1.0, -0.5, 0.0, 1.0))
+    // length 5
+    checkBinarySearch(Array(-1.0, -0.5, 0.0, 1.0, 1.5))
+    // length 3, with inf
+    checkBinarySearch(Array(0.0, 1.0, Double.PositiveInfinity))
+    // length 3, with -inf and inf
+    checkBinarySearch(Array(Double.NegativeInfinity, 1.0, Double.PositiveInfinity))
+    // length 4, with -inf and inf
+    checkBinarySearch(Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity))
+  }
+
+  test("Binary search correctness in contrast with linear search, on random data") {
+    val data = Array.fill(100)(Random.nextDouble())
+    val splits: Array[Double] = Double.NegativeInfinity +:
+      Array.fill(10)(Random.nextDouble()).sorted :+ Double.PositiveInfinity
+    val bsResult = Vectors.dense(data.map(x => Bucketizer.binarySearchForBuckets(splits, x)))
+    val lsResult = Vectors.dense(data.map(x => BucketizerSuite.linearSearchForBuckets(splits, x)))
+    assert(bsResult ~== lsResult absTol 1e-5)
+  }
+}
+
+private object BucketizerSuite extends FunSuite {
+  /** Brute force search for buckets.  Bucket i is defined by the range [split(i), split(i+1)). */
+  def linearSearchForBuckets(splits: Array[Double], feature: Double): Double = {
+    require(feature >= splits.head)
+    var i = 0
+    while (i < splits.length - 1) {
+      if (feature < splits(i + 1)) return i
+      i += 1
+    }
+    throw new RuntimeException(
+      s"linearSearchForBuckets failed to find bucket for feature value $feature")
+  }
+
+  /** Check all values in splits, plus values between all splits. */
+  def checkBinarySearch(splits: Array[Double]): Unit = {
+    def testFeature(feature: Double, expectedBucket: Double): Unit = {
+      assert(Bucketizer.binarySearchForBuckets(splits, feature) === expectedBucket,
+        s"Expected feature value $feature to be in bucket $expectedBucket with splits:" +
+          s" ${splits.mkString(", ")}")
+    }
+    var i = 0
+    while (i < splits.length - 1) {
+      testFeature(splits(i), i) // Split i should fall in bucket i.
+      testFeature((splits(i) + splits(i + 1)) / 2, i) // Value between splits i,i+1 should be in i.
+      i += 1
+    }
+    if (splits.last === Double.PositiveInfinity) {
+      testFeature(Double.PositiveInfinity, splits.length - 2)
+    }
+  }
+}

From f9c7580adadce75a94bd2854cf4f743d8cbd1d23 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 11 May 2015 18:53:50 -0700
Subject: [PATCH 094/320] [SPARK-7530] [STREAMING] Added
 StreamingContext.getState() to expose the current state of the context

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6058 from tdas/SPARK-7530 and squashes the following commits:

80ee0e6 [Tathagata Das] STARTED --> ACTIVE
3da6547 [Tathagata Das] Added synchronized
dd88444 [Tathagata Das] Added more docs
e1a8505 [Tathagata Das] Fixed comment length
89f9980 [Tathagata Das] Change to Java enum and added Java test
7c57351 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7530
dd4e702 [Tathagata Das] Addressed comments.
3d56106 [Tathagata Das] Added Mima excludes
2b86ba1 [Tathagata Das] Added scala docs.
1722433 [Tathagata Das] Fixed style
976b094 [Tathagata Das] Added license
0585130 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7530
e0f0a05 [Tathagata Das] Added getState and exposed StreamingContextState
---
 project/MimaExcludes.scala                    |  4 +
 .../spark/streaming/StreamingContext.scala    | 75 +++++++++++--------
 .../streaming/StreamingContextState.java      | 45 +++++++++++
 .../api/java/JavaStreamingContext.scala       | 22 ++++++
 .../apache/spark/streaming/JavaAPISuite.java  | 14 ++++
 .../spark/streaming/JavaTestUtils.scala       |  1 +
 .../streaming/StreamingContextSuite.scala     | 19 ++++-
 7 files changed, 147 insertions(+), 33 deletions(-)
 create mode 100644 streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index cfe387faec14b..ad3d8426bdedd 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -106,6 +106,10 @@ object MimaExcludes {
               "org.apache.spark.sql.parquet.ParquetTestData$"),
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.TestGroupWriteSupport")
+          ) ++ Seq(
+            // SPARK-7530 Added StreamingContext.getState()
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.streaming.StreamingContext.state_=")
           )
 
         case v if v.startsWith("1.3") =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 5abe1367752d9..2c5834defa8c8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -32,10 +32,11 @@ import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
 import org.apache.spark._
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.input.FixedLengthBinaryInputFormat
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContextState._
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver}
 import org.apache.spark.streaming.scheduler.{JobScheduler, StreamingListener}
@@ -195,14 +196,7 @@ class StreamingContext private[streaming] (
   assert(env.metricsSystem != null)
   env.metricsSystem.registerSource(streamingSource)
 
-  /** Enumeration to identify current state of the StreamingContext */
-  private[streaming] object StreamingContextState extends Enumeration {
-    type CheckpointState = Value
-    val Initialized, Started, Stopped = Value
-  }
-
-  import StreamingContextState._
-  private[streaming] var state = Initialized
+  private var state: StreamingContextState = INITIALIZED
 
   private val startSite = new AtomicReference[CallSite](null)
 
@@ -516,6 +510,21 @@ class StreamingContext private[streaming] (
     )
   }
 
+  /**
+   * :: DeveloperApi ::
+   *
+   * Return the current state of the context. The context can be in three possible states -
+   * - StreamingContextState.INTIALIZED - The context has been created, but not been started yet.
+   *   Input DStreams, transformations and output operations can be created on the context.
+   * - StreamingContextState.ACTIVE - The context has been started, and been not stopped.
+   *   Input DStreams, transformations and output operations cannot be created on the context.
+   * - StreamingContextState.STOPPED - The context has been stopped and cannot be used any more.
+   */
+  @DeveloperApi
+  def getState(): StreamingContextState = synchronized {
+    state
+  }
+
   /**
    * Start the execution of the streams.
    *
@@ -523,11 +532,13 @@ class StreamingContext private[streaming] (
    */
   def start(): Unit = synchronized {
     import StreamingContext._
-    if (state == Started) {
-      throw new SparkException("StreamingContext has already been started")
-    }
-    if (state == Stopped) {
-      throw new SparkException("StreamingContext has already been stopped")
+    state match {
+      case INITIALIZED =>
+        // good to start
+      case ACTIVE =>
+        throw new SparkException("StreamingContext has already been started")
+      case STOPPED =>
+        throw new SparkException("StreamingContext has already been stopped")
     }
     validate()
     startSite.set(DStream.getCreationSite())
@@ -536,7 +547,7 @@ class StreamingContext private[streaming] (
       assertNoOtherContextIsActive()
       scheduler.start()
       uiTab.foreach(_.attach())
-      state = Started
+      state = StreamingContextState.ACTIVE
       setActiveContext(this)
     }
   }
@@ -598,22 +609,26 @@ class StreamingContext private[streaming] (
    *                       received data to be completed
    */
   def stop(stopSparkContext: Boolean, stopGracefully: Boolean): Unit = synchronized {
-    state match {
-      case Initialized => logWarning("StreamingContext has not been started yet")
-      case Stopped => logWarning("StreamingContext has already been stopped")
-      case Started =>
-        scheduler.stop(stopGracefully)
-        logInfo("StreamingContext stopped successfully")
-        waiter.notifyStop()
+    try {
+      state match {
+        case INITIALIZED =>
+          logWarning("StreamingContext has not been started yet")
+        case STOPPED =>
+          logWarning("StreamingContext has already been stopped")
+        case ACTIVE =>
+          scheduler.stop(stopGracefully)
+          uiTab.foreach(_.detach())
+          StreamingContext.setActiveContext(null)
+          waiter.notifyStop()
+          logInfo("StreamingContext stopped successfully")
+      }
+      // Even if we have already stopped, we still need to attempt to stop the SparkContext because
+      // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
+      if (stopSparkContext) sc.stop()
+    } finally {
+      // The state should always be Stopped after calling `stop()`, even if we haven't started yet
+      state = STOPPED
     }
-    // Even if the streaming context has not been started, we still need to stop the SparkContext.
-    // Even if we have already stopped, we still need to attempt to stop the SparkContext because
-    // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
-    if (stopSparkContext) sc.stop()
-    uiTab.foreach(_.detach())
-    // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
-    state = Stopped
-    StreamingContext.setActiveContext(null)
   }
 }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java
new file mode 100644
index 0000000000000..d7b639383ee34
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Represents the state of a StreamingContext.
+ */
+@DeveloperApi
+public enum StreamingContextState {
+  /**
+   * The context has been created, but not been started yet.
+   * Input DStreams, transformations and output operations can be created on the context.
+   */
+  INITIALIZED,
+
+  /**
+   * The context has been started, and been not stopped.
+   * Input DStreams, transformations and output operations cannot be created on the context.
+   */
+  ACTIVE,
+
+  /**
+   * The context has been stopped and cannot be used any more.
+   */
+  STOPPED
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 572d7d8e8753d..d8fbed2c50644 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -578,6 +578,28 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
     ssc.addStreamingListener(streamingListener)
   }
 
+  /**
+   * :: DeveloperApi ::
+   *
+   * Return the current state of the context. The context can be in three possible states -
+   * <ul>
+   *   <li>
+   *   StreamingContextState.INTIALIZED - The context has been created, but not been started yet.
+   *   Input DStreams, transformations and output operations can be created on the context.
+   *   </li>
+   *   <li>
+   *   StreamingContextState.ACTIVE - The context has been started, and been not stopped.
+   *   Input DStreams, transformations and output operations cannot be created on the context.
+   *   </li>
+   *   <li>
+   *   StreamingContextState.STOPPED - The context has been stopped and cannot be used any more.
+   *   </li>
+   * </ul>
+   */
+  def getState(): StreamingContextState = {
+    ssc.getState()
+  }
+
   /**
    * Start the execution of the streams.
    */
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index b1adf881dd0f5..2e00b980b9e44 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -70,6 +70,20 @@ public void testInitialization() {
     Assert.assertNotNull(ssc.sparkContext());
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testContextState() {
+    List<List<Integer>> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4));
+    Assert.assertTrue(ssc.getState() == StreamingContextState.INITIALIZED);
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaTestUtils.attachTestOutputStream(stream);
+    Assert.assertTrue(ssc.getState() == StreamingContextState.INITIALIZED);
+    ssc.start();
+    Assert.assertTrue(ssc.getState() == StreamingContextState.ACTIVE);
+    ssc.stop();
+    Assert.assertTrue(ssc.getState() == StreamingContextState.STOPPED);
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void testCount() {
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
index c0ea0491c313d..bb80bff6dc2e6 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
@@ -70,6 +70,7 @@ trait JavaTestBase extends TestSuiteBase {
       ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[V]] = {
     implicit val cm: ClassTag[V] =
       implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
+    ssc.getState()
     val res = runStreams[V](ssc.ssc, numBatches, numExpectedOutput)
     val out = new ArrayList[JList[V]]()
     res.map(entry => out.append(new ArrayList[V](entry)))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 11c7fd835bfcd..b8247db7e8950 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -109,15 +109,21 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(ssc.conf.getTimeAsSeconds("spark.cleaner.ttl", "-1") === 10)
   }
 
+  test("state matching") {
+    import StreamingContextState._
+    assert(INITIALIZED === INITIALIZED)
+    assert(INITIALIZED != ACTIVE)
+  }
+
   test("start and stop state check") {
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
 
-    assert(ssc.state === ssc.StreamingContextState.Initialized)
+    assert(ssc.getState() === StreamingContextState.INITIALIZED)
     ssc.start()
-    assert(ssc.state === ssc.StreamingContextState.Started)
+    assert(ssc.getState() === StreamingContextState.ACTIVE)
     ssc.stop()
-    assert(ssc.state === ssc.StreamingContextState.Stopped)
+    assert(ssc.getState() === StreamingContextState.STOPPED)
 
     // Make sure that the SparkContext is also stopped by default
     intercept[Exception] {
@@ -129,9 +135,11 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
     ssc.start()
+    assert(ssc.getState() === StreamingContextState.ACTIVE)
     intercept[SparkException] {
       ssc.start()
     }
+    assert(ssc.getState() === StreamingContextState.ACTIVE)
   }
 
   test("stop multiple times") {
@@ -139,13 +147,16 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop()
+    assert(ssc.getState() === StreamingContextState.STOPPED)
     ssc.stop()
+    assert(ssc.getState() === StreamingContextState.STOPPED)
   }
 
   test("stop before start") {
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
     ssc.stop()  // stop before start should not throw exception
+    assert(ssc.getState() === StreamingContextState.STOPPED)
   }
 
   test("start after stop") {
@@ -156,6 +167,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     intercept[SparkException] {
       ssc.start() // start after stop should throw exception
     }
+    assert(ssc.getState() === StreamingContextState.STOPPED)
   }
 
   test("stop only streaming context") {
@@ -167,6 +179,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop(stopSparkContext = false)
+    assert(ssc.getState() === StreamingContextState.STOPPED)
     assert(sc.makeRDD(1 to 100).collect().size === 100)
     sc.stop()
 

From b6bf4f76c78abfaafa99b3c3c08b498aa9644346 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 19:15:14 -0700
Subject: [PATCH 095/320] [SPARK-7324] [SQL] DataFrame.dropDuplicates

This should also close https://github.com/apache/spark/pull/5870

Author: Reynold Xin <rxin@databricks.com>

Closes #6066 from rxin/dropDups and squashes the following commits:

130692f [Reynold Xin] [SPARK-7324][SQL] DataFrame.dropDuplicates
---
 python/pyspark/sql/dataframe.py               | 36 +++++++++++++++++-
 .../org/apache/spark/sql/DataFrame.scala      | 38 ++++++++++++++++++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 35 +++++++++++++++++
 3 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index c2fa6c87384a6..4eaa8d9c572ff 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -755,8 +755,6 @@ def groupBy(self, *cols):
         jdf = self._jdf.groupBy(self._jcols(*cols))
         return GroupedData(jdf, self.sql_ctx)
 
-    groupby = groupBy
-
     def agg(self, *exprs):
         """ Aggregate on the entire :class:`DataFrame` without groups
         (shorthand for ``df.groupBy.agg()``).
@@ -793,6 +791,36 @@ def subtract(self, other):
         """
         return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
 
+    def dropDuplicates(self, subset=None):
+        """Return a new :class:`DataFrame` with duplicate rows removed,
+        optionally only considering certain columns.
+
+        >>> from pyspark.sql import Row
+        >>> df = sc.parallelize([ \
+            Row(name='Alice', age=5, height=80), \
+            Row(name='Alice', age=5, height=80), \
+            Row(name='Alice', age=10, height=80)]).toDF()
+        >>> df.dropDuplicates().show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        |  5|    80|Alice|
+        | 10|    80|Alice|
+        +---+------+-----+
+
+        >>> df.dropDuplicates(['name', 'height']).show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        |  5|    80|Alice|
+        +---+------+-----+
+        """
+        if subset is None:
+            jdf = self._jdf.dropDuplicates()
+        else:
+            jdf = self._jdf.dropDuplicates(self._jseq(subset))
+        return DataFrame(jdf, self.sql_ctx)
+
     def dropna(self, how='any', thresh=None, subset=None):
         """Returns a new :class:`DataFrame` omitting rows with null values.
 
@@ -1012,6 +1040,10 @@ def toPandas(self):
         import pandas as pd
         return pd.DataFrame.from_records(self.collect(), columns=self.columns)
 
+    # Pandas compatibility
+    groupby = groupBy
+    drop_duplicates = dropDuplicates
+
 
 # Having SchemaRDD for backward compatibility (for docs)
 class SchemaRDD(DataFrame):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 2472999de37f5..265a61592b943 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import java.io.CharArrayWriter
 import java.sql.DriverManager
 
-
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
@@ -42,7 +41,7 @@ import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
 import org.apache.spark.sql.jdbc.JDBCWriteDetails
-import org.apache.spark.sql.json.{JacksonGenerator, JsonRDD}
+import org.apache.spark.sql.json.JacksonGenerator
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
 import org.apache.spark.util.Utils
@@ -932,6 +931,40 @@ class DataFrame private[sql](
     }
   }
 
+  /**
+   * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
+   * This is an alias for `distinct`.
+   * @group dfops
+   */
+  def dropDuplicates(): DataFrame = dropDuplicates(this.columns)
+
+  /**
+   * (Scala-specific) Returns a new [[DataFrame]] with duplicate rows removed, considering only
+   * the subset of columns.
+   *
+   * @group dfops
+   */
+  def dropDuplicates(colNames: Seq[String]): DataFrame = {
+    val groupCols = colNames.map(resolve)
+    val groupColExprIds = groupCols.map(_.exprId)
+    val aggCols = logicalPlan.output.map { attr =>
+      if (groupColExprIds.contains(attr.exprId)) {
+        attr
+      } else {
+        Alias(First(attr), attr.name)()
+      }
+    }
+    Aggregate(groupCols, aggCols, logicalPlan)
+  }
+
+  /**
+   * Returns a new [[DataFrame]] with duplicate rows removed, considering only
+   * the subset of columns.
+   *
+   * @group dfops
+   */
+  def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq)
+
   /**
    * Computes statistics for numeric columns, including count, mean, stddev, min, and max.
    * If no columns are given, this function computes statistics for all numerical columns.
@@ -1089,6 +1122,7 @@ class DataFrame private[sql](
 
   /**
    * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
+   * This is an alias for `dropDuplicates`.
    * @group dfops
    */
   override def distinct: DataFrame = Distinct(logicalPlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 7552c1288165f..2ade955864b71 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -457,4 +457,39 @@ class DataFrameSuite extends QueryTest {
     assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
     assert(complexData.filter(complexData("s")("key") === 1).count() == 1)
   }
+
+  test("SPARK-7324 dropDuplicates") {
+    val testData = TestSQLContext.sparkContext.parallelize(
+      (2, 1, 2) :: (1, 1, 1) ::
+      (1, 2, 1) :: (2, 1, 2) ::
+      (2, 2, 2) :: (2, 2, 1) ::
+      (2, 1, 1) :: (1, 1, 2) ::
+      (1, 2, 2) :: (1, 2, 1) :: Nil).toDF("key", "value1", "value2")
+
+    checkAnswer(
+      testData.dropDuplicates(),
+      Seq(Row(2, 1, 2), Row(1, 1, 1), Row(1, 2, 1),
+        Row(2, 2, 2), Row(2, 1, 1), Row(2, 2, 1),
+        Row(1, 1, 2), Row(1, 2, 2)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("key", "value1")),
+      Seq(Row(2, 1, 2), Row(1, 2, 1), Row(1, 1, 1), Row(2, 2, 2)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("value1", "value2")),
+      Seq(Row(2, 1, 2), Row(1, 2, 1), Row(1, 1, 1), Row(2, 2, 2)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("key")),
+      Seq(Row(2, 1, 2), Row(1, 1, 1)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("value1")),
+      Seq(Row(2, 1, 2), Row(1, 2, 1)))
+
+    checkAnswer(
+      testData.dropDuplicates(Seq("value2")),
+      Seq(Row(2, 1, 2), Row(1, 1, 1)))
+  }
 }

From e35d878be3b2976333618a356b88440f5e8ba408 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Mon, 11 May 2015 19:21:16 -0700
Subject: [PATCH 096/320] [SPARK-7411] [SQL] Support SerDe for HiveQl in CTAS

This is a follow up of #5876 and should be merged after #5876.

Let's wait for unit testing result from Jenkins.

Author: Cheng Hao <hao.cheng@intel.com>

Closes #5963 from chenghao-intel/useIsolatedClient and squashes the following commits:

f87ace6 [Cheng Hao] remove the TODO and add `resolved condition` for HiveTable
a8260e8 [Cheng Hao] Update code as feedback
f4e243f [Cheng Hao] remove the serde setting for SequenceFile
d166afa [Cheng Hao] style issue
d25a4aa [Cheng Hao] Add SerDe support for CTAS
---
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  66 +++---
 .../org/apache/spark/sql/hive/HiveQl.scala    | 207 +++++++++++++++---
 .../spark/sql/hive/client/ClientWrapper.scala |   6 +
 .../hive/execution/CreateTableAsSelect.scala  |  12 +-
 .../apache/spark/sql/hive/HiveQlSuite.scala   | 177 +++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    |   4 +-
 6 files changed, 390 insertions(+), 82 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index f5398605bc7eb..bbf48efb24440 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -407,64 +407,58 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
    * For example, because of a CREATE TABLE X AS statement.
    */
   object CreateTables extends Rule[LogicalPlan] {
-    import org.apache.hadoop.hive.ql.Context
-    import org.apache.hadoop.hive.ql.parse.{ASTNode, QB, SemanticAnalyzer}
-
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
       // Wait until children are resolved.
       case p: LogicalPlan if !p.childrenResolved => p
+      case p: LogicalPlan if p.resolved => p
+      case p @ CreateTableAsSelect(table, child, allowExisting) =>
+        val schema = if (table.schema.size > 0) {
+          table.schema
+        } else {
+          child.output.map {
+            attr => new HiveColumn(
+              attr.name,
+              HiveMetastoreTypes.toMetastoreType(attr.dataType), null)
+          }
+        }
+
+        val desc = table.copy(schema = schema)
 
-      case CreateTableAsSelect(desc, child, allowExisting) =>
-        if (hive.convertCTAS && !desc.serde.isDefined) {
+        if (hive.convertCTAS && table.serde.isEmpty) {
           // Do the conversion when spark.sql.hive.convertCTAS is true and the query
           // does not specify any storage format (file format and storage handler).
-          if (desc.specifiedDatabase.isDefined) {
+          if (table.specifiedDatabase.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
-              "when spark.sql.hive.convertCTAS is set to true.")
+                "when spark.sql.hive.convertCTAS is set to true.")
           }
 
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
             desc.name,
-            conf.defaultDataSourceName,
+            hive.conf.defaultDataSourceName,
             temporary = false,
             mode,
             options = Map.empty[String, String],
             child
           )
         } else {
-          execution.CreateTableAsSelect(
-            desc.copy(
-              specifiedDatabase = Option(desc.specifiedDatabase.getOrElse(client.currentDatabase))),
-            child,
-            allowExisting)
-        }
-
-      case p: LogicalPlan if p.resolved => p
-
-      case p @ CreateTableAsSelect(desc, child, allowExisting) =>
-        val (dbName, tblName) = processDatabaseAndTableName(desc.database, desc.name)
-
-        if (hive.convertCTAS) {
-          if (desc.specifiedDatabase.isDefined) {
-            throw new AnalysisException(
-              "Cannot specify database name in a CTAS statement " +
-              "when spark.sql.hive.convertCTAS is set to true.")
+          val desc = if (table.serde.isEmpty) {
+            // add default serde
+            table.copy(
+              serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+          } else {
+            table
           }
 
-          val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
-          CreateTableUsingAsSelect(
-            tblName,
-            conf.defaultDataSourceName,
-            temporary = false,
-            mode,
-            options = Map.empty[String, String],
-            child
-          )
-        } else {
+          val (dbName, tblName) =
+            processDatabaseAndTableName(
+              desc.specifiedDatabase.getOrElse(client.currentDatabase), desc.name)
+
           execution.CreateTableAsSelect(
-            desc,
+            desc.copy(
+              specifiedDatabase = Some(dbName),
+              name = tblName),
             child,
             allowExisting)
         }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 04d40bbb2bced..2cbb5ca4d2e0c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -22,14 +22,15 @@ import java.sql.Date
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.ql.Context
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.hadoop.hive.ql.{ErrorMsg, Context}
 import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, FunctionInfo}
 import org.apache.hadoop.hive.ql.lib.Node
-import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
-import org.apache.spark.sql.AnalysisException
+import org.apache.hadoop.hive.ql.session.SessionState
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
@@ -62,7 +63,13 @@ case class CreateTableAsSelect(
     allowExisting: Boolean) extends UnaryNode with Command {
 
   override def output: Seq[Attribute] = Seq.empty[Attribute]
-  override lazy val resolved: Boolean = tableDesc.specifiedDatabase.isDefined && childrenResolved
+  override lazy val resolved: Boolean =
+    tableDesc.specifiedDatabase.isDefined &&
+    tableDesc.schema.size > 0 &&
+    tableDesc.serde.isDefined &&
+    tableDesc.inputFormat.isDefined &&
+    tableDesc.outputFormat.isDefined &&
+    childrenResolved
 }
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
@@ -240,12 +247,23 @@ private[hive] object HiveQl {
      * Otherwise, there will be Null pointer exception,
      * when retrieving properties form HiveConf.
      */
-    val hContext = new Context(new HiveConf())
+    val hContext = new Context(hiveConf)
     val node = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, hContext))
     hContext.clear()
     node
   }
 
+  /**
+   * Returns the HiveConf
+   */
+  private[this] def hiveConf(): HiveConf = {
+    val ss = SessionState.get() // SessionState is lazy initializaion, it can be null here
+    if (ss == null) {
+      new HiveConf()
+    } else {
+      ss.getConf
+    }
+  }
 
   /** Returns a LogicalPlan for a given HiveQL string. */
   def parseSql(sql: String): LogicalPlan = hqlParser.parse(sql)
@@ -476,8 +494,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
       DropTable(tableName, ifExists.nonEmpty)
     // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
     case Token("TOK_ANALYZE",
-            Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
-            isNoscan) =>
+           Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) ::
+           isNoscan) =>
       // Reference:
       // https://cwiki.apache.org/confluence/display/Hive/StatsDev#StatsDev-ExistingTables
       if (partitionSpec.nonEmpty) {
@@ -547,6 +565,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
       val (
           Some(tableNameParts) ::
           _ /* likeTable */ ::
+          externalTable ::
           Some(query) ::
           allowExisting +:
           ignores) =
@@ -554,6 +573,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           Seq(
             "TOK_TABNAME",
             "TOK_LIKETABLE",
+            "EXTERNAL",
             "TOK_QUERY",
             "TOK_IFNOTEXISTS",
             "TOK_TABLECOMMENT",
@@ -576,43 +596,153 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           children)
       val (db, tableName) = extractDbNameTableName(tableNameParts)
 
-      var tableDesc =
-        HiveTable(
-          specifiedDatabase = db,
-          name = tableName,
-          schema = Seq.empty,
-          partitionColumns = Seq.empty,
-          properties = Map.empty,
-          serdeProperties = Map.empty,
-          tableType = ManagedTable,
-          location = None,
-          inputFormat = None,
-          outputFormat = None,
-          serde = None)
-
-      // TODO: Handle all the cases here...
-      children.foreach {
-        case Token("TOK_TBLRCFILE", Nil) =>
-          import org.apache.hadoop.hive.ql.io.{RCFileInputFormat, RCFileOutputFormat}
+      // TODO add bucket support
+      var tableDesc: HiveTable = HiveTable(
+        specifiedDatabase = db,
+        name = tableName,
+        schema = Seq.empty[HiveColumn],
+        partitionColumns = Seq.empty[HiveColumn],
+        properties = Map[String, String](),
+        serdeProperties = Map[String, String](),
+        tableType = if (externalTable.isDefined) ExternalTable else ManagedTable,
+        location = None,
+        inputFormat = None,
+        outputFormat = None,
+        serde = None,
+        viewText = None)
+
+      // default storage type abbriviation (e.g. RCFile, ORC, PARQUET etc.)
+      val defaultStorageType = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT)
+      // handle the default format for the storage type abbriviation
+      tableDesc = if ("SequenceFile".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
+            outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+        } else if ("RCFile".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"),
+            serde = Option(hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE)))
+        } else if ("ORC".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"),
+            serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+        } else if ("PARQUET".equalsIgnoreCase(defaultStorageType)) {
+          tableDesc.copy(
+            inputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+            outputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+            serde =
+              Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+        } else {
+          tableDesc.copy(
+            inputFormat =
+              Option("org.apache.hadoop.mapred.TextInputFormat"),
+            outputFormat =
+              Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"))
+        }
+
+      children.collect {
+        case list @ Token("TOK_TABCOLLIST", _) =>
+          val cols = BaseSemanticAnalyzer.getColumns(list, true)
+          if (cols != null) {
+            tableDesc = tableDesc.copy(
+              schema = cols.map { field =>
+                HiveColumn(field.getName, field.getType, field.getComment)
+              })
+          }
+        case Token("TOK_TABLECOMMENT", child :: Nil) =>
+          val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
+          // TODO support the sql text
+          tableDesc = tableDesc.copy(viewText = Option(comment))
+        case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) =>
+          val cols = BaseSemanticAnalyzer.getColumns(list(0), false)
+          if (cols != null) {
+            tableDesc = tableDesc.copy(
+              partitionColumns = cols.map { field =>
+                HiveColumn(field.getName, field.getType, field.getComment)
+              })
+          }
+        case Token("TOK_TABLEROWFORMAT", Token("TOK_SERDEPROPS", child :: Nil) :: Nil)=>
+          val serdeParams = new java.util.HashMap[String, String]()
+          child match {
+            case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) =>
+              val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString (rowChild1.getText())
+              serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim)
+              serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim)
+              if (rowChild2.length > 1) {
+                val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString (rowChild2(0).getText)
+                serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape)
+              }
+            case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) =>
+              val collItemDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim)
+            case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) =>
+              val mapKeyDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim)
+            case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) =>
+              val lineDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              if (!(lineDelim == "\n") && !(lineDelim == "10")) {
+                throw new AnalysisException(
+                  SemanticAnalyzer.generateErrorMessage(
+                    rowChild,
+                    ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg))
+              }
+              serdeParams.put(serdeConstants.LINE_DELIM, lineDelim)
+            case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) =>
+              val nullFormat = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              // TODO support the nullFormat
+            case _ => assert(false)
+          }
+          tableDesc = tableDesc.copy(
+            serdeProperties = tableDesc.serdeProperties ++ serdeParams)
+        case Token("TOK_TABLELOCATION", child :: Nil) =>
+          var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
+          location = EximUtil.relativeToAbsolutePath(hiveConf, location)
+          tableDesc = tableDesc.copy(location = Option(location))
+        case Token("TOK_TABLESERIALIZER", child :: Nil) =>
           tableDesc = tableDesc.copy(
-            outputFormat = Option(classOf[RCFileOutputFormat].getName),
-            inputFormat = Option(classOf[RCFileInputFormat[_, _]].getName))
+            serde = Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText)))
+          if (child.getChildCount == 2) {
+            val serdeParams = new java.util.HashMap[String, String]()
+            BaseSemanticAnalyzer.readProps(
+              (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], serdeParams)
+            tableDesc = tableDesc.copy(serdeProperties = tableDesc.serdeProperties ++ serdeParams)
+          }
+        case Token("TOK_FILEFORMAT_GENERIC", child :: Nil) =>
+          throw new SemanticException(
+            "Unrecognized file format in STORED AS clause:${child.getText}")
 
+        case Token("TOK_TBLRCFILE", Nil) =>
+          tableDesc = tableDesc.copy(
+            inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"),
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
           if (tableDesc.serde.isEmpty) {
             tableDesc = tableDesc.copy(
               serde = Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
           }
+
         case Token("TOK_TBLORCFILE", Nil) =>
           tableDesc = tableDesc.copy(
             inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"),
-            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"),
-            serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+            outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
+          if (tableDesc.serde.isEmpty) {
+            tableDesc = tableDesc.copy(
+              serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+          }
 
         case Token("TOK_TBLPARQUETFILE", Nil) =>
           tableDesc = tableDesc.copy(
-            inputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
-            outputFormat = Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
-            serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+            inputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+            outputFormat =
+              Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
+          if (tableDesc.serde.isEmpty) {
+            tableDesc = tableDesc.copy(
+              serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+          }
 
         case Token("TOK_TABLESERIALIZER",
                Token("TOK_SERDENAME", Token(serdeName, Nil) :: otherProps) :: Nil) =>
@@ -627,13 +757,20 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
 
         case Token("TOK_TABLEPROPERTIES", list :: Nil) =>
           tableDesc = tableDesc.copy(properties = tableDesc.properties ++ getProperties(list))
-
-        case _ =>
+        case list @ Token("TOK_TABLEFILEFORMAT", _) =>
+          tableDesc = tableDesc.copy(
+            inputFormat =
+              Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)),
+            outputFormat =
+              Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText)))
+        case Token("TOK_STORAGEHANDLER", _) =>
+          throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg())
+        case _ => // Unsupport features
       }
 
       CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting != None)
 
-    // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command.
+    // If its not a "CTAS" like above then take it as a native command
     case Token("TOK_CREATETABLE", _) => NativePlaceholder
 
     // Support "TRUNCATE TABLE table_name [PARTITION partition_spec]"
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
index 6bca9d0179fe3..99aa0f1ded3f8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala
@@ -225,6 +225,12 @@ private[hive] class ClientWrapper(
       table.partitionColumns.map(c => new FieldSchema(c.name, c.hiveType, c.comment)))
     table.properties.foreach { case (k, v) => qlTable.setProperty(k, v) }
     table.serdeProperties.foreach { case (k, v) => qlTable.setSerdeParam(k, v) }
+
+    // set owner
+    qlTable.setOwner(conf.getUser)
+    // set create time
+    qlTable.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int])
+
     version match {
       case hive.v12 =>
         table.location.map(new URI(_)).foreach(u => qlTable.call[URI, Unit]("setDataLocation", u))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 91e6ac4032204..7d3ec12c4eb05 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc
-
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{AnalysisException, SQLContext}
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.RunnableCommand
@@ -29,13 +27,10 @@ import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, HiveMetastoreT
 
 /**
  * Create table and insert the query result into it.
- * @param database the database name of the new relation
- * @param tableName the table name of the new relation
+ * @param tableDesc the Table Describe, which may contains serde, storage handler etc.
  * @param query the query whose result will be insert into the new relation
  * @param allowExisting allow continue working if it's already exists, otherwise
  *                      raise exception
- * @param desc the CreateTableDesc, which may contains serde, storage handler etc.
-
  */
 private[hive]
 case class CreateTableAsSelect(
@@ -80,8 +75,7 @@ case class CreateTableAsSelect(
       if (allowExisting) {
         // table already exists, will do nothing, to keep consistent with Hive
       } else {
-        throw
-          new org.apache.hadoop.hive.metastore.api.AlreadyExistsException(s"$database.$tableName")
+        throw new AnalysisException(s"$database.$tableName already exists.")
       }
     } else {
       hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
new file mode 100644
index 0000000000000..941a2941649b8
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.session.SessionState
+import org.apache.hadoop.hive.serde.serdeConstants
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.hive.client.{ManagedTable, HiveColumn, ExternalTable, HiveTable}
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+
+class HiveQlSuite extends FunSuite with BeforeAndAfterAll {
+  override def beforeAll() {
+    if (SessionState.get() == null) {
+      SessionState.start(new HiveConf())
+    }
+  }
+
+  private def extractTableDesc(sql: String): (HiveTable, Boolean) = {
+    HiveQl.createPlan(sql).collect {
+      case CreateTableAsSelect(desc, child, allowExisting) => (desc, allowExisting)
+    }.head
+  }
+
+  test("Test CTAS #1") {
+    val s1 =
+      """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |(viewTime INT,
+        |userid BIGINT,
+        |page_url STRING,
+        |referrer_url STRING,
+        |ip STRING COMMENT 'IP Address of the User',
+        |country STRING COMMENT 'country of origination')
+        |COMMENT 'This is the staging page view table'
+        |PARTITIONED BY (dt STRING COMMENT 'date type', hour STRING COMMENT 'hour of the day')
+        |ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' STORED AS RCFILE
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src""".stripMargin
+
+    val (desc, exists) = extractTableDesc(s1)
+    assert(exists == true)
+    assert(desc.specifiedDatabase == Some("mydb"))
+    assert(desc.name == "page_view")
+    assert(desc.tableType == ExternalTable)
+    assert(desc.location == Some("/user/external/page_view"))
+    assert(desc.schema ==
+      HiveColumn("viewtime", "int", null) ::
+        HiveColumn("userid", "bigint", null) ::
+        HiveColumn("page_url", "string", null) ::
+        HiveColumn("referrer_url", "string", null) ::
+        HiveColumn("ip", "string", "IP Address of the User") ::
+        HiveColumn("country", "string", "country of origination") :: Nil)
+    // TODO will be SQLText
+    assert(desc.viewText == Option("This is the staging page view table"))
+    assert(desc.partitionColumns ==
+      HiveColumn("dt", "string", "date type") ::
+        HiveColumn("hour", "string", "hour of the day") :: Nil)
+    assert(desc.serdeProperties ==
+      Map((serdeConstants.SERIALIZATION_FORMAT, "\054"), (serdeConstants.FIELD_DELIM, "\054")))
+    assert(desc.inputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+    assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+    assert(desc.serde == Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
+    assert(desc.properties == Map(("p1", "v1"), ("p2", "v2")))
+  }
+
+  test("Test CTAS #2") {
+    val s2 =
+      """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |(viewTime INT,
+        |userid BIGINT,
+        |page_url STRING,
+        |referrer_url STRING,
+        |ip STRING COMMENT 'IP Address of the User',
+        |country STRING COMMENT 'country of origination')
+        |COMMENT 'This is the staging page view table'
+        |PARTITIONED BY (dt STRING COMMENT 'date type', hour STRING COMMENT 'hour of the day')
+        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
+        | STORED AS
+        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
+        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src""".stripMargin
+
+    val (desc, exists) = extractTableDesc(s2)
+    assert(exists == true)
+    assert(desc.specifiedDatabase == Some("mydb"))
+    assert(desc.name == "page_view")
+    assert(desc.tableType == ExternalTable)
+    assert(desc.location == Some("/user/external/page_view"))
+    assert(desc.schema ==
+      HiveColumn("viewtime", "int", null) ::
+        HiveColumn("userid", "bigint", null) ::
+        HiveColumn("page_url", "string", null) ::
+        HiveColumn("referrer_url", "string", null) ::
+        HiveColumn("ip", "string", "IP Address of the User") ::
+        HiveColumn("country", "string", "country of origination") :: Nil)
+    // TODO will be SQLText
+    assert(desc.viewText == Option("This is the staging page view table"))
+    assert(desc.partitionColumns ==
+      HiveColumn("dt", "string", "date type") ::
+        HiveColumn("hour", "string", "hour of the day") :: Nil)
+    assert(desc.serdeProperties == Map())
+    assert(desc.inputFormat == Option("parquet.hive.DeprecatedParquetInputFormat"))
+    assert(desc.outputFormat == Option("parquet.hive.DeprecatedParquetOutputFormat"))
+    assert(desc.serde == Option("parquet.hive.serde.ParquetHiveSerDe"))
+    assert(desc.properties == Map(("p1", "v1"), ("p2", "v2")))
+  }
+
+  test("Test CTAS #3") {
+    val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
+    val (desc, exists) = extractTableDesc(s3)
+    assert(exists == false)
+    assert(desc.specifiedDatabase == None)
+    assert(desc.name == "page_view")
+    assert(desc.tableType == ManagedTable)
+    assert(desc.location == None)
+    assert(desc.schema == Seq.empty[HiveColumn])
+    assert(desc.viewText == None) // TODO will be SQLText
+    assert(desc.serdeProperties == Map())
+    assert(desc.inputFormat == Option("org.apache.hadoop.mapred.TextInputFormat"))
+    assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"))
+    assert(desc.serde.isEmpty)
+    assert(desc.properties == Map())
+  }
+
+  test("Test CTAS #4") {
+    val s4 =
+      """CREATE TABLE page_view
+        |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin
+    intercept[AnalysisException] {
+      extractTableDesc(s4)
+    }
+  }
+
+  test("Test CTAS #5") {
+    val s5 = """CREATE TABLE ctas2
+               | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+               | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+               | STORED AS RCFile
+               | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+               | AS
+               |   SELECT key, value
+               |   FROM src
+               |   ORDER BY key, value""".stripMargin
+    val (desc, exists) = extractTableDesc(s5)
+    assert(exists == false)
+    assert(desc.specifiedDatabase == None)
+    assert(desc.name == "ctas2")
+    assert(desc.tableType == ManagedTable)
+    assert(desc.location == None)
+    assert(desc.schema == Seq.empty[HiveColumn])
+    assert(desc.viewText == None) // TODO will be SQLText
+    assert(desc.serdeProperties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
+    assert(desc.inputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+    assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+    assert(desc.serde == Option("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"))
+    assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22")))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1353802604402..0d739dead4c73 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -201,7 +201,7 @@ class SQLQuerySuite extends QueryTest {
     var message = intercept[AnalysisException] {
       sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
     }.getMessage
-    assert(message.contains("Table ctas1 already exists"))
+    assert(message.contains("ctas1 already exists"))
     checkRelation("ctas1", true)
     sql("DROP TABLE ctas1")
 
@@ -314,7 +314,7 @@ class SQLQuerySuite extends QueryTest {
           SELECT key, value
           FROM src
           ORDER BY key, value""").collect().toSeq)
-    intercept[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] {
+    intercept[AnalysisException] {
       sql(
         """CREATE TABLE ctas4 AS
           | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect()

From 4b5e1fe94c65f0feb77d849e7defa42b6007628d Mon Sep 17 00:00:00 2001
From: Zhongshuai Pei <799203320@qq.com>
Date: Mon, 11 May 2015 19:22:44 -0700
Subject: [PATCH 097/320] [SPARK-7437] [SQL] Fold "literal in (item1, item2,
 ..., literal, ...)" into true or false directly

SQL
```
select key from src where 3 in (4, 5);
```
Before
```
== Optimized Logical Plan ==
Project [key#12]
 Filter 3 INSET (5,4)
  MetastoreRelation default, src, None
```

After
```
== Optimized Logical Plan ==
LocalRelation [key#228], []
```

Author: Zhongshuai Pei <799203320@qq.com>
Author: DoingDone9 <799203320@qq.com>

Closes #5972 from DoingDone9/InToFalse and squashes the following commits:

4c722a2 [Zhongshuai Pei] Update predicates.scala
abe2bbb [Zhongshuai Pei] Update Optimizer.scala
fa461a5 [Zhongshuai Pei] Update Optimizer.scala
e34c28a [Zhongshuai Pei] Update predicates.scala
24739bd [Zhongshuai Pei] Update ConstantFoldingSuite.scala
f4dbf50 [Zhongshuai Pei] Update ConstantFoldingSuite.scala
35ceb7a [Zhongshuai Pei] Update Optimizer.scala
36c194e [Zhongshuai Pei] Update Optimizer.scala
2e8f6ca [Zhongshuai Pei] Update Optimizer.scala
14952e2 [Zhongshuai Pei] Merge pull request #13 from apache/master
f03fe7f [Zhongshuai Pei] Merge pull request #12 from apache/master
f12fa50 [Zhongshuai Pei] Merge pull request #10 from apache/master
f61210c [Zhongshuai Pei] Merge pull request #9 from apache/master
34b1a9a [Zhongshuai Pei] Merge pull request #8 from apache/master
802261c [DoingDone9] Merge pull request #7 from apache/master
d00303b [DoingDone9] Merge pull request #6 from apache/master
98b134f [DoingDone9] Merge pull request #5 from apache/master
161cae3 [DoingDone9] Merge pull request #4 from apache/master
c87e8b6 [DoingDone9] Merge pull request #3 from apache/master
cb1852d [DoingDone9] Merge pull request #2 from apache/master
c3f046f [DoingDone9] Merge pull request #1 from apache/master
---
 .../sql/catalyst/expressions/predicates.scala |  1 +
 .../sql/catalyst/optimizer/Optimizer.scala    |  4 +--
 .../optimizer/ConstantFoldingSuite.scala      | 33 +++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 50b0f3ee5f93f..1d72a9eb834b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -110,6 +110,7 @@ case class InSet(value: Expression, hset: Set[Any])
 
   override def children: Seq[Expression] = value :: Nil
 
+  override def foldable: Boolean = value.foldable
   override def nullable: Boolean = true // TODO: Figure out correct nullability semantics of IN.
   override def toString: String = s"$value INSET ${hset.mkString("(", ",", ")")}"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d7b2f203a6934..1ee5fb245fbb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -46,13 +46,13 @@ object DefaultOptimizer extends Optimizer {
       CombineLimits) ::
     Batch("ConstantFolding", FixedPoint(100),
       NullPropagation,
+      OptimizeIn,
       ConstantFolding,
       LikeSimplification,
       BooleanSimplification,
       SimplifyFilters,
       SimplifyCasts,
-      SimplifyCaseConversionExpressions,
-      OptimizeIn) ::
+      SimplifyCaseConversionExpressions) ::
     Batch("Decimal Optimizations", FixedPoint(100),
       DecimalAggregates) ::
     Batch("LocalRelation", FixedPoint(100),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 6b7d9a85c341b..5697c2272b8e8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -35,6 +35,7 @@ class ConstantFoldingSuite extends PlanTest {
       Batch("AnalysisNodes", Once,
         EliminateSubQueries) ::
       Batch("ConstantFolding", Once,
+        OptimizeIn,
         ConstantFolding,
         BooleanSimplification) :: Nil
   }
@@ -247,4 +248,36 @@ class ConstantFoldingSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+  
+  test("Constant folding test: Fold In(v, list) into true or false") {
+    var originalQuery =
+      testRelation
+        .select('a)
+        .where(In(Literal(1), Seq(Literal(1), Literal(2))))
+
+    var optimized = Optimize.execute(originalQuery.analyze)
+
+    var correctAnswer =
+      testRelation
+        .select('a)
+        .where(Literal(true))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+
+    originalQuery =
+      testRelation
+        .select('a)
+        .where(In(Literal(1), Seq(Literal(1), 'a.attr)))
+
+    optimized = Optimize.execute(originalQuery.analyze)
+
+    correctAnswer =
+      testRelation
+        .select('a)
+        .where(Literal(true))
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }

From 028ad4bd29106747089bb9a863e9a8dee738770e Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 20:04:36 -0700
Subject: [PATCH 098/320] [SPARK-7509][SQL] DataFrame.drop in Python for
 dropping columns.

Author: Reynold Xin <rxin@databricks.com>

Closes #6068 from rxin/drop-column and squashes the following commits:

9d7d5ec [Reynold Xin] [SPARK-7509][SQL] DataFrame.drop in Python for dropping columns.
---
 python/pyspark/sql/dataframe.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 4eaa8d9c572ff..72180f6d05fbc 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1014,7 +1014,7 @@ def withColumn(self, colName, col):
 
     @ignore_unicode_prefix
     def withColumnRenamed(self, existing, new):
-        """REturns a new :class:`DataFrame` by renaming an existing column.
+        """Returns a new :class:`DataFrame` by renaming an existing column.
 
         :param existing: string, name of the existing column to rename.
         :param col: string, new name of the column.
@@ -1027,6 +1027,18 @@ def withColumnRenamed(self, existing, new):
                 for c in self.columns]
         return self.select(*cols)
 
+    @ignore_unicode_prefix
+    def drop(self, colName):
+        """Returns a new :class:`DataFrame` that drops the specified column.
+
+        :param colName: string, name of the column to drop.
+
+        >>> df.drop('age').collect()
+        [Row(name=u'Alice'), Row(name=u'Bob')]
+        """
+        jdf = self._jdf.drop(colName)
+        return DataFrame(jdf, self.sql_ctx)
+
     def toPandas(self):
         """Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
 

From b94a93371cf219a88edee7677d22f1eaefc1ea5b Mon Sep 17 00:00:00 2001
From: Joshi <rekhajoshm@gmail.com>
Date: Mon, 11 May 2015 21:02:34 -0700
Subject: [PATCH 099/320] [SPARK-7435] [SPARKR] Make DataFrame.show()
 consistent with that of Scala and pySpark

Author: Joshi <rekhajoshm@gmail.com>
Author: Rekha Joshi <rekhajoshm@gmail.com>

Closes #5989 from rekhajoshm/fix/SPARK-7435 and squashes the following commits:

cfc9e02 [Joshi] Spark-7435[R]: updated patch for review comments
62becc1 [Joshi] SPARK-7435: Update to DataFrame
e3677c9 [Rekha Joshi] Merge pull request #1 from apache/master
---
 R/pkg/R/DataFrame.R              | 7 ++++---
 R/pkg/inst/tests/test_sparkSQL.R | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 8a9d2dd45c588..2705817531019 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -150,7 +150,7 @@ setMethod("isLocal",
             callJMethod(x@sdf, "isLocal")
           })
 
-#' ShowDF
+#' showDF
 #'
 #' Print the first numRows rows of a DataFrame
 #'
@@ -170,7 +170,8 @@ setMethod("isLocal",
 setMethod("showDF",
           signature(x = "DataFrame"),
           function(x, numRows = 20) {
-            callJMethod(x@sdf, "showString", numToInt(numRows))
+            s <- callJMethod(x@sdf, "showString", numToInt(numRows))
+            cat(s)
           })
 
 #' show
@@ -187,7 +188,7 @@ setMethod("showDF",
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' show(df)
+#' df
 #'}
 setMethod("show", "DataFrame",
           function(object) {
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index dbb535e245321..99c28830c6237 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -653,7 +653,8 @@ test_that("toJSON() returns an RDD of the correct values", {
 
 test_that("showDF()", {
   df <- jsonFile(sqlCtx, jsonPath)
-  expect_output(showDF(df), "+----+-------+\n| age|   name|\n+----+-------+\n|null|Michael|\n|  30|   Andy|\n|  19| Justin|\n+----+-------+\n")
+  s <- capture.output(showDF(df))
+  expect_output(s , "+----+-------+\n| age|   name|\n+----+-------+\n|null|Michael|\n|  30|   Andy|\n|  19| Justin|\n+----+-------+\n")
 })
 
 test_that("isLocal()", {

From 16696759e9a292378cbfdf695a63d6d0cff0d79a Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 11 May 2015 22:06:56 -0700
Subject: [PATCH 100/320] [SQL] Rename Dialect -> ParserDialect.

Author: Reynold Xin <rxin@databricks.com>

Closes #6071 from rxin/parserdialect and squashes the following commits:

ca2eb31 [Reynold Xin] Rename Dialect -> ParserDialect.
---
 .../catalyst/{Dialect.scala => ParserDialect.scala}  |  2 +-
 .../main/scala/org/apache/spark/sql/SQLContext.scala | 12 ++++++------
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala   |  4 ++--
 .../org/apache/spark/sql/hive/HiveContext.scala      |  4 ++--
 .../spark/sql/hive/execution/SQLQuerySuite.scala     |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/{Dialect.scala => ParserDialect.scala} (97%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
similarity index 97%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
index 977003493d471..05a92b06f9fd9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
  *
  */
 @DeveloperApi
-abstract class Dialect {
+abstract class ParserDialect {
   // this is the main function that will be implemented by sql parser.
   def parse(sqlText: String): LogicalPlan
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 28fc9d04436f7..648021806f3ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.Dialect
+import org.apache.spark.sql.catalyst.ParserDialect
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, expressions}
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
@@ -49,7 +49,7 @@ import org.apache.spark.{Partition, SparkContext}
 
 /**
  * Currently we support the default dialect named "sql", associated with the class
- * [[DefaultDialect]]
+ * [[DefaultParserDialect]]
  *
  * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
  * {{{
@@ -74,7 +74,7 @@ import org.apache.spark.{Partition, SparkContext}
  *-- "hiveql" (for HiveContext)
  * }}}
  */
-private[spark] class DefaultDialect extends Dialect {
+private[spark] class DefaultParserDialect extends ParserDialect {
   @transient
   protected val sqlParser = new catalyst.SqlParser
 
@@ -176,10 +176,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @transient
   protected[sql] val sqlParser = new SparkSQLParser(getSQLDialect().parse(_))
 
-  protected[sql] def getSQLDialect(): Dialect = {
+  protected[sql] def getSQLDialect(): ParserDialect = {
     try {
       val clazz = Utils.classForName(dialectClassName)
-      clazz.newInstance().asInstanceOf[Dialect]
+      clazz.newInstance().asInstanceOf[ParserDialect]
     } catch {
       case NonFatal(e) =>
         // Since we didn't find the available SQL Dialect, it will fail even for SET command:
@@ -209,7 +209,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected[sql] val defaultSession = createSession()
 
   protected[sql] def dialectClassName = if (conf.dialect == "sql") {
-    classOf[DefaultDialect].getCanonicalName
+    classOf[DefaultParserDialect].getCanonicalName
   } else {
     conf.dialect
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b44eb223c80c5..ec0e76cde6f7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.test.TestSQLContext.{udf => _, _}
 import org.apache.spark.sql.types._
 
 /** A SQL Dialect for testing purpose, and it can not be nested type */
-class MyDialect extends DefaultDialect
+class MyDialect extends DefaultParserDialect
 
 class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   // Make sure the tables are loaded.
@@ -94,7 +94,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       newContext.sql("SELECT 1")
     }
     // test if the dialect set back to DefaultSQLDialect
-    assert(newContext.getSQLDialect().getClass === classOf[DefaultDialect])
+    assert(newContext.getSQLDialect().getClass === classOf[DefaultParserDialect])
   }
 
   test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 3bab648e31d1f..61e8c154e8c3c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -22,7 +22,7 @@ import java.sql.Timestamp
 import java.util.{ArrayList => JArrayList}
 
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution
-import org.apache.spark.sql.catalyst.Dialect
+import org.apache.spark.sql.catalyst.ParserDialect
 
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
@@ -54,7 +54,7 @@ import org.apache.spark.util.Utils
 /**
  * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext
  */
-private[hive] class HiveQLDialect extends Dialect {
+private[hive] class HiveQLDialect extends ParserDialect {
   override def parse(sqlText: String): LogicalPlan = {
     HiveQl.parseSql(sqlText)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 0d739dead4c73..a5744ccc68a47 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
 import org.apache.spark.sql.catalyst.errors.DialectException
-import org.apache.spark.sql.DefaultDialect
+import org.apache.spark.sql.DefaultParserDialect
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SQLConf}
 import org.apache.spark.sql.hive.MetastoreRelation
 import org.apache.spark.sql.hive.test.TestHive
@@ -53,7 +53,7 @@ case class WindowData(
     area: String,
     product: Int)
 /** A SQL Dialect for testing purpose, and it can not be nested type */
-class MyDialect extends DefaultDialect
+class MyDialect extends DefaultParserDialect
 
 /**
  * A collection of hive query tests where we generate the answers ourselves instead of depending on
@@ -247,7 +247,7 @@ class SQLQuerySuite extends QueryTest {
 
     // set the dialect back to the DefaultSQLDialect
     sql("SET spark.sql.dialect=sql")
-    assert(getSQLDialect().getClass === classOf[DefaultDialect])
+    assert(getSQLDialect().getClass === classOf[DefaultParserDialect])
     sql("SET spark.sql.dialect=hiveql")
     assert(getSQLDialect().getClass === classOf[HiveQLDialect])
 

From 640f63b959f936ac142ce429262ffc3db4536619 Mon Sep 17 00:00:00 2001
From: vidmantas zemleris <vidmantas@vinted.com>
Date: Mon, 11 May 2015 22:29:24 -0700
Subject: [PATCH 101/320] [SPARK-6994][SQL] Update docs for fetching Row fields
 by name

add docs for https://issues.apache.org/jira/browse/SPARK-6994

Author: vidmantas zemleris <vidmantas@vinted.com>

Closes #6030 from vidma/docs/row-with-named-fields and squashes the following commits:

241b401 [vidmantas zemleris] [SPARK-6994][SQL] Update docs for fetching Row fields by name
---
 docs/sql-programming-guide.md | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 6b7b867ea6b5d..78b8e8ad515a0 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -367,11 +367,18 @@ val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split("
 people.registerTempTable("people")
 
 // SQL statements can be run by using the sql methods provided by sqlContext.
-val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 13 AND age <= 19")
 
 // The results of SQL queries are DataFrames and support all the normal RDD operations.
-// The columns of a row in the result can be accessed by ordinal.
+// The columns of a row in the result can be accessed by field index:
 teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
+
+// or by field name:
+teenagers.map(t => "Name: " + t.getAs[String]("name")).collect().foreach(println)
+
+// row.getValuesMap[T] retrieves multiple columns at once into a Map[String, T]
+teenagers.map(_.getValuesMap[Any](List("name", "age"))).collect().foreach(println)
+// Map("name" -> "Justin", "age" -> 19)
 {% endhighlight %}
 
 </div>
@@ -538,7 +545,7 @@ peopleDataFrame.registerTempTable("people")
 val results = sqlContext.sql("SELECT name FROM people")
 
 // The results of SQL queries are DataFrames and support all the normal RDD operations.
-// The columns of a row in the result can be accessed by ordinal.
+// The columns of a row in the result can be accessed by field index or by field name.
 results.map(t => "Name: " + t(0)).collect().foreach(println)
 {% endhighlight %}
 

From 8a4edecc483490da4fce406ac3b401860834ac1e Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Tue, 12 May 2015 01:34:33 -0700
Subject: [PATCH 102/320] [SPARK-7534] [CORE] [WEBUI] Fix the Stage table when
 a stage is missing

Just improved the Stage table when a stage is missing.

Before:

![screen shot 2015-05-11 at 10 11 51 am](https://cloud.githubusercontent.com/assets/1000778/7570842/2ba37380-f7c8-11e4-9b5f-cf1a6264b2a4.png)

After:

![screen shot 2015-05-11 at 10 26 09 am](https://cloud.githubusercontent.com/assets/1000778/7570848/33703152-f7c8-11e4-81a8-d53dd72d7b8d.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #6061 from zsxwing/SPARK-7534 and squashes the following commits:

09fe862 [zsxwing] Leave it blank rather than '-'
6299197 [zsxwing] Fix the Stage table when a stage is missing
---
 .../org/apache/spark/ui/jobs/StageTable.scala     | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index a33243d4252bf..82ba561eefb16 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -121,10 +121,23 @@ private[ui] class StageTableBase(
     <div>{stageDesc.getOrElse("")} {killLink} {nameLink} {details}</div>
   }
 
+  protected def missingStageRow(stageId: Int): Seq[Node] = {
+    <td>{stageId}</td> ++
+    {if (isFairScheduler) {<td>-</td>} else Seq.empty} ++
+    <td>No data available for this stage</td> ++ // Description
+    <td></td> ++ // Submitted
+    <td></td> ++ // Duration
+    <td></td> ++ // Tasks: Succeeded/Total
+    <td></td> ++ // Input
+    <td></td> ++ // Output
+    <td></td> ++ // Shuffle Read
+    <td></td> // Shuffle Write
+  }
+
   protected def stageRow(s: StageInfo): Seq[Node] = {
     val stageDataOption = listener.stageIdToData.get((s.stageId, s.attemptId))
     if (stageDataOption.isEmpty) {
-      return <td>{s.stageId}</td><td>No data available for this stage</td>
+      return missingStageRow(s.stageId)
     }
 
     val stageData = stageDataOption.get

From 984787526625b4ef8a1635faf7a5ac3cb0b758b7 Mon Sep 17 00:00:00 2001
From: linweizhong <linweizhong@huawei.com>
Date: Tue, 12 May 2015 01:36:27 -0700
Subject: [PATCH 103/320] [MINOR] [PYSPARK] Set PYTHONPATH to
 python/lib/pyspark.zip rather than python/pyspark

As PR #5580 we have created pyspark.zip on building and set PYTHONPATH to python/lib/pyspark.zip, so to keep consistence update this.

Author: linweizhong <linweizhong@huawei.com>

Closes #6047 from Sephiroth-Lin/pyspark_pythonpath and squashes the following commits:

8cc3d96 [linweizhong] Set PYTHONPATH to python/lib/pyspark.zip rather than python/pyspark as PR#5580 we have create pyspark.zip on build
---
 .../main/scala/org/apache/spark/api/python/PythonUtils.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index acbaba6791850..9eff0a230cbe2 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -31,7 +31,7 @@ private[spark] object PythonUtils {
   def sparkPythonPath: String = {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
-      pythonPath += Seq(sparkHome, "python").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
       pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)

From 82e890fb19d6fbaffa69856eecb4699f2f8a81eb Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 12 May 2015 01:39:21 -0700
Subject: [PATCH 104/320] [SPARK-7485] [BUILD] Remove pyspark files from
 assembly.

The sbt part of the build is hacky; it basically tricks sbt
into generating the zip by using a generator, but returns
an empty list for the generated files so that nothing is
actually added to the assembly.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #6022 from vanzin/SPARK-7485 and squashes the following commits:

22c1e04 [Marcelo Vanzin] Remove unneeded code.
4893622 [Marcelo Vanzin] [SPARK-7485] [build] Remove pyspark files from assembly.
---
 core/pom.xml             | 47 ----------------------------------------
 mllib/pom.xml            | 11 ----------
 project/SparkBuild.scala | 44 +++----------------------------------
 sql/core/pom.xml         |  8 -------
 streaming/pom.xml        |  8 -------
 5 files changed, 3 insertions(+), 115 deletions(-)

diff --git a/core/pom.xml b/core/pom.xml
index fc42f48973fe9..262a3320db106 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -381,35 +381,6 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
-      <!-- Unzip py4j so we can include its files in the jar -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>generate-resources</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <target>
-            <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
-          </target>
-        </configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-clean-plugin</artifactId>
-        <configuration>
-          <filesets>
-            <fileset>
-              <directory>${basedir}/../python/build</directory>
-            </fileset>
-          </filesets>
-          <verbose>true</verbose>
-        </configuration>
-      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
@@ -438,24 +409,6 @@
         </executions>
       </plugin>
     </plugins>
-
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-      </resource>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/*.py</include>
-        </includes>
-      </resource>
-      <resource>
-        <directory>../python/build</directory>
-        <includes>
-          <include>py4j/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 
   <profiles>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a3c57ae26000b..0c07ca1a62fd3 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -141,16 +141,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <resources>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/mllib/*.py</include>
-          <include>pyspark/mllib/stat/*.py</include>
-          <include>pyspark/ml/*.py</include>
-          <include>pyspark/ml/param/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345af0e60e..1b87e4e98bd83 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
   /* Enable Assembly for all assembly projects */
   assemblyProjects.foreach(enable(Assembly.settings))
 
-  /* Package pyspark artifacts in the main assembly. */
+  /* Package pyspark artifacts in a separate zip file for YARN. */
   enable(PySparkAssembly.settings)(assembly)
 
   /* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
   import java.util.zip.{ZipOutputStream, ZipEntry}
 
   lazy val settings = Seq(
-    unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
     // Use a resource generator to copy all .py files from python/pyspark into a managed directory
     // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
     // list since that will copy unneeded / unwanted files.
     resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
       val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
       val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
       zipFile.delete()
       zipRecursive(src, zipFile)
-
-      val dst = new File(outDir, "pyspark")
-      if (!dst.isDirectory()) {
-        require(dst.mkdirs())
-      }
-      copy(src, dst)
+      Seq[File]()
     }
   )
 
@@ -416,42 +409,11 @@ object PySparkAssembly {
           output.write(buf, 0, n)
         }
       }
+      output.closeEntry()
       in.close()
     }
   }
 
-  private def copy(src: File, dst: File): Seq[File] = {
-    src.listFiles().flatMap { f =>
-      val child = new File(dst, f.getName())
-      if (f.isDirectory()) {
-        child.mkdir()
-        copy(f, child)
-      } else if (f.getName().endsWith(".py")) {
-        var in: Option[FileInputStream] = None
-        var out: Option[FileOutputStream] = None
-        try {
-          in = Some(new FileInputStream(f))
-          out = Some(new FileOutputStream(child))
-
-          val bytes = new Array[Byte](1024)
-          var read = 0
-          while (read >= 0) {
-            read = in.get.read(bytes)
-            if (read > 0) {
-              out.get.write(bytes, 0, read)
-            }
-          }
-
-          Some(child)
-        } finally {
-          in.foreach(_.close())
-          out.foreach(_.close())
-        }
-      } else {
-        None
-      }
-    }
-  }
 }
 
 object Unidoc {
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7d274a73e079f..ffe95bb49188f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,13 +103,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <resources>
-      <resource>
-        <directory>../../python</directory>
-        <includes>
-          <include>pyspark/sql/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 5ca55a4f680bb..5ab7f4472c38b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,13 +105,5 @@
         </configuration>
       </plugin>
     </plugins>
-    <resources>
-      <resource>
-        <directory>../python</directory>
-        <includes>
-          <include>pyspark/streaming/*.py</include>
-        </includes>
-      </resource>
-    </resources>
   </build>
 </project>

From f3e8e60063ccf0d713d03e671a3231560475f90d Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Tue, 12 May 2015 01:40:55 -0700
Subject: [PATCH 105/320] [SPARK-7467] Dag visualization: treat checkpoint as
 an RDD operation

Such that a checkpoint RDD does not go into random scopes on the UI, e.g. `take`. We've seen this in streaming.

Author: Andrew Or <andrew@databricks.com>

Closes #6004 from andrewor14/dag-viz-checkpoint and squashes the following commits:

9217439 [Andrew Or] Fix checkpoints
4ae8806 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-checkpoint
19bc07b [Andrew Or] Treat checkpoint as an RDD operation
---
 .../main/scala/org/apache/spark/rdd/RDD.scala    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 7dad30ecbdd2f..02a94baf372d9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1523,13 +1523,15 @@ abstract class RDD[T: ClassTag](
    * has completed (therefore the RDD has been materialized and potentially stored in memory).
    * doCheckpoint() is called recursively on the parent RDDs.
    */
-  private[spark] def doCheckpoint() {
-    if (!doCheckpointCalled) {
-      doCheckpointCalled = true
-      if (checkpointData.isDefined) {
-        checkpointData.get.doCheckpoint()
-      } else {
-        dependencies.foreach(_.rdd.doCheckpoint())
+  private[spark] def doCheckpoint(): Unit = {
+    RDDOperationScope.withScope(sc, "checkpoint", false, true) {
+      if (!doCheckpointCalled) {
+        doCheckpointCalled = true
+        if (checkpointData.isDefined) {
+          checkpointData.get.doCheckpoint()
+        } else {
+          dependencies.foreach(_.rdd.doCheckpoint())
+        }
       }
     }
   }

From ec6f2a9774167014566fb9608ee4394d2ce5fd6a Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 12 May 2015 08:48:24 -0700
Subject: [PATCH 106/320] [SPARK-7532] [STREAMING] StreamingContext.start()
 made to logWarning and not throw exception

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6060 from tdas/SPARK-7532 and squashes the following commits:

6fe2e83 [Tathagata Das] Update docs
7dadfc3 [Tathagata Das] Fixed bug again
99c7678 [Tathagata Das] Added logInfo
65aec20 [Tathagata Das] Fix bug
5bf031b [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7532
1a9a818 [Tathagata Das] Fix scaladoc
c584313 [Tathagata Das] StreamingContext.start() made to logWarning and not throw exception
---
 .../spark/streaming/StreamingContext.scala    | 27 +++++++++----------
 .../streaming/StreamingContextSuite.scala     |  4 +--
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 2c5834defa8c8..8461e901208ff 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -528,28 +528,27 @@ class StreamingContext private[streaming] (
   /**
    * Start the execution of the streams.
    *
-   * @throws SparkException if the context has already been started or stopped.
+   * @throws SparkException if the StreamingContext is already stopped.
    */
   def start(): Unit = synchronized {
-    import StreamingContext._
     state match {
       case INITIALIZED =>
-        // good to start
+        validate()
+        startSite.set(DStream.getCreationSite())
+        sparkContext.setCallSite(startSite.get)
+        StreamingContext.ACTIVATION_LOCK.synchronized {
+          StreamingContext.assertNoOtherContextIsActive()
+          scheduler.start()
+          uiTab.foreach(_.attach())
+          state = StreamingContextState.ACTIVE
+          StreamingContext.setActiveContext(this)
+        }
+        logInfo("StreamingContext started")
       case ACTIVE =>
-        throw new SparkException("StreamingContext has already been started")
+        logWarning("StreamingContext has already been started")
       case STOPPED =>
         throw new SparkException("StreamingContext has already been stopped")
     }
-    validate()
-    startSite.set(DStream.getCreationSite())
-    sparkContext.setCallSite(startSite.get)
-    ACTIVATION_LOCK.synchronized {
-      assertNoOtherContextIsActive()
-      scheduler.start()
-      uiTab.foreach(_.attach())
-      state = StreamingContextState.ACTIVE
-      setActiveContext(this)
-    }
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index b8247db7e8950..47299513de095 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -136,9 +136,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     addInputStream(ssc).register()
     ssc.start()
     assert(ssc.getState() === StreamingContextState.ACTIVE)
-    intercept[SparkException] {
-      ssc.start()
-    }
+    ssc.start()
     assert(ssc.getState() === StreamingContextState.ACTIVE)
   }
 

From d86ce845840a92b4dde7975082738ed94ab8c570 Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <daoyuan.wang@intel.com>
Date: Tue, 12 May 2015 10:23:41 -0700
Subject: [PATCH 107/320] [SPARK-6876] [PySpark] [SQL] add DataFrame na.replace
 in pyspark

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #6003 from adrian-wang/pynareplace and squashes the following commits:

672efba [Daoyuan Wang] remove py2.7 feature
4a148f7 [Daoyuan Wang] to_replace support dict, value support single value, and add full tests
9e232e7 [Daoyuan Wang] rename scala map
af0268a [Daoyuan Wang] remove na
63ac579 [Daoyuan Wang] add na.replace in pyspark
---
 .../apache/spark/api/python/PythonUtils.scala |  7 ++
 python/pyspark/sql/dataframe.py               | 85 +++++++++++++++++++
 python/pyspark/sql/tests.py                   | 48 +++++++++++
 3 files changed, 140 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 9eff0a230cbe2..efb6b93cfc35d 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -53,4 +53,11 @@ private[spark] object PythonUtils {
   def toSeq[T](cols: JList[T]): Seq[T] = {
     cols.toList.toSeq
   }
+
+  /**
+   * Convert java map of K, V into Map of K, V (for calling API with varargs)
+   */
+  def toScalaMap[K, V](jm: java.util.Map[K, V]): Map[K, V] = {
+    jm.toMap
+  }
 }
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 72180f6d05fbc..078acfdf7e2df 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -578,6 +578,10 @@ def _jseq(self, cols, converter=None):
         """Return a JVM Seq of Columns from a list of Column or names"""
         return _to_seq(self.sql_ctx._sc, cols, converter)
 
+    def _jmap(self, jm):
+        """Return a JVM Scala Map from a dict"""
+        return _to_scala_map(self.sql_ctx._sc, jm)
+
     def _jcols(self, *cols):
         """Return a JVM Seq of Columns from a list of Column or column names
 
@@ -924,6 +928,80 @@ def fillna(self, value, subset=None):
 
             return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx)
 
+    def replace(self, to_replace, value, subset=None):
+        """Returns a new :class:`DataFrame` replacing a value with another value.
+
+        :param to_replace: int, long, float, string, or list.
+            Value to be replaced.
+            If the value is a dict, then `value` is ignored and `to_replace` must be a
+            mapping from column name (string) to replacement value. The value to be
+            replaced must be an int, long, float, or string.
+        :param value: int, long, float, string, or list.
+            Value to use to replace holes.
+            The replacement value must be an int, long, float, or string. If `value` is a
+            list or tuple, `value` should be of the same length with `to_replace`.
+        :param subset: optional list of column names to consider.
+            Columns specified in subset that do not have matching data type are ignored.
+            For example, if `value` is a string, and subset contains a non-string column,
+            then the non-string column is simply ignored.
+        >>> df4.replace(10, 20).show()
+        +----+------+-----+
+        | age|height| name|
+        +----+------+-----+
+        |  20|    80|Alice|
+        |   5|  null|  Bob|
+        |null|  null|  Tom|
+        |null|  null| null|
+        +----+------+-----+
+
+        >>> df4.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
+        +----+------+----+
+        | age|height|name|
+        +----+------+----+
+        |  10|    80|   A|
+        |   5|  null|   B|
+        |null|  null| Tom|
+        |null|  null|null|
+        +----+------+----+
+        """
+        if not isinstance(to_replace, (float, int, long, basestring, list, tuple, dict)):
+            raise ValueError(
+                "to_replace should be a float, int, long, string, list, tuple, or dict")
+
+        if not isinstance(value, (float, int, long, basestring, list, tuple)):
+            raise ValueError("value should be a float, int, long, string, list, or tuple")
+
+        rep_dict = dict()
+
+        if isinstance(to_replace, (float, int, long, basestring)):
+            to_replace = [to_replace]
+
+        if isinstance(to_replace, tuple):
+            to_replace = list(to_replace)
+
+        if isinstance(value, tuple):
+            value = list(value)
+
+        if isinstance(to_replace, list) and isinstance(value, list):
+            if len(to_replace) != len(value):
+                raise ValueError("to_replace and value lists should be of the same length")
+            rep_dict = dict(zip(to_replace, value))
+        elif isinstance(to_replace, list) and isinstance(value, (float, int, long, basestring)):
+            rep_dict = dict([(tr, value) for tr in to_replace])
+        elif isinstance(to_replace, dict):
+            rep_dict = to_replace
+
+        if subset is None:
+            return DataFrame(self._jdf.na().replace('*', rep_dict), self.sql_ctx)
+        elif isinstance(subset, basestring):
+            subset = [subset]
+
+        if not isinstance(subset, (list, tuple)):
+            raise ValueError("subset should be a list or tuple of column names")
+
+        return DataFrame(
+            self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx)
+
     def corr(self, col1, col2, method=None):
         """
         Calculates the correlation of two columns of a DataFrame as a double value. Currently only
@@ -1226,6 +1304,13 @@ def _to_seq(sc, cols, converter=None):
     return sc._jvm.PythonUtils.toSeq(cols)
 
 
+def _to_scala_map(sc, jm):
+    """
+    Convert a dict into a JVM Map.
+    """
+    return sc._jvm.PythonUtils.toScalaMap(jm)
+
+
 def _unary_op(name, doc="unary operator"):
     """ Create a method for given unary operator """
     def _(self):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 7e63f4d6461f6..1922d03af61da 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -665,6 +665,54 @@ def test_bitwise_operations(self):
         result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
         self.assertEqual(~75, result['~b'])
 
+    def test_replace(self):
+        schema = StructType([
+            StructField("name", StringType(), True),
+            StructField("age", IntegerType(), True),
+            StructField("height", DoubleType(), True)])
+
+        # replace with int
+        row = self.sqlCtx.createDataFrame([(u'Alice', 10, 10.0)], schema).replace(10, 20).first()
+        self.assertEqual(row.age, 20)
+        self.assertEqual(row.height, 20.0)
+
+        # replace with double
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 80, 80.0)], schema).replace(80.0, 82.1).first()
+        self.assertEqual(row.age, 82)
+        self.assertEqual(row.height, 82.1)
+
+        # replace with string
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(u'Alice', u'Ann').first()
+        self.assertEqual(row.name, u"Ann")
+        self.assertEqual(row.age, 10)
+
+        # replace with subset specified by a string of a column name w/ actual change
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='age').first()
+        self.assertEqual(row.age, 20)
+
+        # replace with subset specified by a string of a column name w/o actual change
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='height').first()
+        self.assertEqual(row.age, 10)
+
+        # replace with subset specified with one column replaced, another column not in subset
+        # stays unchanged.
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, 10.0)], schema).replace(10, 20, subset=['name', 'age']).first()
+        self.assertEqual(row.name, u'Alice')
+        self.assertEqual(row.age, 20)
+        self.assertEqual(row.height, 10.0)
+
+        # replace with subset specified but no column will be replaced
+        row = self.sqlCtx.createDataFrame(
+            [(u'Alice', 10, None)], schema).replace(10, 20, subset=['name', 'height']).first()
+        self.assertEqual(row.name, u'Alice')
+        self.assertEqual(row.age, 10)
+        self.assertEqual(row.height, None)
+
 
 class HiveContextSQLTests(ReusedPySparkTestCase):
 

From 831504cf6bde6b1131005d5552e56a842725c84c Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Tue, 12 May 2015 10:28:40 -0700
Subject: [PATCH 108/320] [DataFrame][minor] cleanup unapply methods in
 DataTypes

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6079 from cloud-fan/unapply and squashes the following commits:

40da442 [Wenchen Fan] one more
7d90a05 [Wenchen Fan] cleanup unapply in DataTypes
---
 .../org/apache/spark/sql/types/DataType.scala     | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 0992a7c311ee2..a0b261649f66f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -47,10 +47,7 @@ abstract class DataType {
    *     ...
    * }}}
    */
-  private[sql] def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType == this => true
-    case _ => false
-  }
+  private[sql] def unapply(e: Expression): Boolean = e.dataType == this
 
   /**
    * The default size of a value of this data type, used internally for size estimation.
@@ -137,10 +134,7 @@ private[sql] object IntegralType {
    *     ...
    * }}}
    */
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType.isInstanceOf[IntegralType] => true
-    case _ => false
-  }
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[IntegralType]
 }
 
 
@@ -157,10 +151,7 @@ private[sql] object FractionalType {
    *     ...
    * }}}
    */
-  def unapply(a: Expression): Boolean = a match {
-    case e: Expression if e.dataType.isInstanceOf[FractionalType] => true
-    case _ => false
-  }
+  def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[FractionalType]
 }
 
 

From 0595b6de8f1da04baceda082553c2aa1aa2cb006 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 13 May 2015 01:32:28 +0800
Subject: [PATCH 109/320] [SPARK-3928] [SPARK-5182] [SQL] Partitioning support
 for the data sources API

This PR adds partitioning support for the external data sources API. It aims to simplify development of file system based data sources, and provide first class partitioning support for both read path and write path.  Existing data sources like JSON and Parquet can be simplified with this work.

## New features provided

1. Hive compatible partition discovery

   This actually generalizes the partition discovery strategy used in Parquet data source in Spark 1.3.0.

1. Generalized partition pruning optimization

   Now partition pruning is handled during physical planning phase.  Specific data sources don't need to worry about this harness anymore.

   (This also implies that we can remove `CatalystScan` after migrating the Parquet data source, since now we don't need to pass Catalyst expressions to data source implementations.)

1. Insertion with dynamic partitions

   When inserting data to a `FSBasedRelation`, data can be partitioned dynamically by specified partition columns.

## New structures provided

### Developer API

1. `FSBasedRelation`

   Base abstract class for file system based data sources.

1. `OutputWriter`

   Base abstract class for output row writers, responsible for writing a single row object.

1. `FSBasedRelationProvider`

   A new relation provider for `FSBasedRelation` subclasses. Note that data sources extending `FSBasedRelation` don't need to extend `RelationProvider` and `SchemaRelationProvider`.

### User API

New overloaded versions of

1. `DataFrame.save()`
1. `DataFrame.saveAsTable()`
1. `SQLContext.load()`

are provided to allow users to save/load DataFrames with user defined dynamic partition columns.

### Spark SQL query planning

1. `InsertIntoFSBasedRelation`

   Used to implement write path for `FSBasedRelation`s.

1. New rules for `FSBasedRelation` in `DataSourceStrategy`

   These are added to hook `FSBasedRelation` into physical query plan in read path, and perform partition pruning.

## TODO

- [ ] Use scratch directories when overwriting a table with data selected from itself.

      Currently, this is not supported, because the table been overwritten is always deleted before writing any data to it.

- [ ] When inserting with dynamic partition columns, use external sorter to group the data first.

      This ensures that we only need to open a single `OutputWriter` at a time.  For data sources like Parquet, `OutputWriter`s can be quite memory consuming.  One issue is that, this approach breaks the row distribution in the original DataFrame.  However, we did't promise to preserve data distribution when writing a DataFrame.

- [x] More tests.  Specifically, test cases for

      - [x] Self-join
      - [x] Loading partitioned relations with a subset of partition columns stored in data files.
      - [x] `SQLContext.load()` with user defined dynamic partition columns.

## Parquet data source migration

Parquet data source migration is covered in PR https://github.com/liancheng/spark/pull/6, which is against this PR branch and for preview only. A formal PR need to be made after this one is merged.

Author: Cheng Lian <lian@databricks.com>

Closes #5526 from liancheng/partitioning-support and squashes the following commits:

5351a1b [Cheng Lian] Fixes compilation error introduced while rebasing
1f9b1a5 [Cheng Lian] Tweaks data schema passed to FSBasedRelations
43ba50e [Cheng Lian] Avoids serializing generated projection code
edf49e7 [Cheng Lian] Removed commented stale code block
348a922 [Cheng Lian] Adds projection in FSBasedRelation.buildScan(requiredColumns, inputPaths)
ad4d4de [Cheng Lian] Enables HDFS style globbing
8d12e69 [Cheng Lian] Fixes compilation error
c71ac6c [Cheng Lian] Addresses comments from @marmbrus
7552168 [Cheng Lian] Fixes typo in MimaExclude.scala
0349e09 [Cheng Lian] Fixes compilation error introduced while rebasing
52b0c9b [Cheng Lian] Adjusts project/MimaExclude.scala
c466de6 [Cheng Lian] Addresses comments
bc3f9b4 [Cheng Lian] Uses projection to separate partition columns and data columns while inserting rows
795920a [Cheng Lian] Fixes compilation error after rebasing
0b8cd70 [Cheng Lian] Adds Scala/Catalyst row conversion when writing non-partitioned tables
fa543f3 [Cheng Lian] Addresses comments
5849dd0 [Cheng Lian] Fixes doc typos.  Fixes partition discovery refresh.
51be443 [Cheng Lian] Replaces FSBasedRelation.outputCommitterClass with FSBasedRelation.prepareForWrite
c4ed4fe [Cheng Lian] Bug fixes and a new test suite
a29e663 [Cheng Lian] Bug fix: should only pass actuall data files to FSBaseRelation.buildScan
5f423d3 [Cheng Lian] Bug fixes. Lets data source to customize OutputCommitter rather than OutputFormat
54c3d7b [Cheng Lian] Enforces that FileOutputFormat must be used
be0c268 [Cheng Lian] Uses TaskAttempContext rather than Configuration in OutputWriter.init
0bc6ad1 [Cheng Lian] Resorts to new Hadoop API, and now FSBasedRelation can customize output format class
f320766 [Cheng Lian] Adds prepareForWrite() hook, refactored writer containers
422ff4a [Cheng Lian] Fixes style issue
ce52353 [Cheng Lian] Adds new SQLContext.load() overload with user defined dynamic partition columns
8d2ff71 [Cheng Lian] Merges partition columns when reading partitioned relations
ca1805b [Cheng Lian] Removes duplicated partition discovery code in new Parquet
f18dec2 [Cheng Lian] More strict schema checking
b746ab5 [Cheng Lian] More tests
9b487bf [Cheng Lian] Fixes compilation errors introduced while rebasing
ea6c8dd [Cheng Lian] Removes remote debugging stuff
327bb1d [Cheng Lian] Implements partitioning support for data sources API
3c5073a [Cheng Lian] Fixes SaveModes used in test cases
fb5a607 [Cheng Lian] Fixes compilation error
9d17607 [Cheng Lian] Adds the contract that OutputWriter should have zero-arg constructor
5de194a [Cheng Lian] Forgot Apache licence header
95d0b4d [Cheng Lian] Renames PartitionedSchemaRelationProvider to FSBasedRelationProvider
770b5ba [Cheng Lian] Adds tests for FSBasedRelation
3ba9bbf [Cheng Lian] Adds DataFrame.saveAsTable() overrides which support partitioning
1b8231f [Cheng Lian] Renames FSBasedPrunedFilteredScan to FSBasedRelation
aa8ba9a [Cheng Lian] Javadoc fix
012ed2d [Cheng Lian] Adds PartitioningOptions
7dd8dd5 [Cheng Lian] Adds new interfaces and stub methods for data sources API partitioning support
---
 .../apache/spark/deploy/SparkHadoopUtil.scala |  52 +-
 project/MimaExcludes.scala                    |  14 +-
 .../org/apache/spark/sql/DataFrame.scala      | 107 +++-
 .../scala/org/apache/spark/sql/SQLConf.scala  |   6 +
 .../org/apache/spark/sql/SQLContext.scala     |  36 +-
 .../spark/sql/execution/SparkStrategies.scala |  10 +-
 .../apache/spark/sql/parquet/newParquet.scala | 182 +-----
 .../sql/sources/DataSourceStrategy.scala      | 191 ++++++-
 .../spark/sql/sources/PartitioningUtils.scala | 207 +++++++
 .../apache/spark/sql/sources/commands.scala   | 406 +++++++++++++-
 .../org/apache/spark/sql/sources/ddl.scala    |  90 ++-
 .../apache/spark/sql/sources/interfaces.scala | 283 +++++++++-
 .../org/apache/spark/sql/sources/rules.scala  |   8 +-
 .../ParquetPartitionDiscoverySuite.scala      |   3 +-
 .../sources/CreateTableAsSelectSuite.scala    |   4 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  13 +-
 .../spark/sql/hive/HiveStrategies.scala       |   6 +-
 .../spark/sql/hive/execution/commands.scala   |  27 +-
 .../spark/sql/hive/hiveWriterContainers.scala |   2 +-
 .../sql/sources/FSBasedRelationSuite.scala    | 525 ++++++++++++++++++
 .../sql/sources/SimpleTextRelation.scala      | 125 +++++
 21 files changed, 2042 insertions(+), 255 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index b563034457a91..7fa75ac8c2b54 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -22,22 +22,22 @@ import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
 import java.util.{Arrays, Comparator}
 
+import scala.collection.JavaConversions._
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 import org.apache.hadoop.fs.FileSystem.Statistics
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.JobContext
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
-import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
-
-import scala.collection.JavaConversions._
-import scala.concurrent.duration._
-import scala.language.postfixOps
+import org.apache.spark.{Logging, SparkConf, SparkException}
 
 /**
  * :: DeveloperApi ::
@@ -199,13 +199,43 @@ class SparkHadoopUtil extends Logging {
    * that file.
    */
   def listLeafStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = {
-    def recurse(path: Path): Array[FileStatus] = {
-      val (directories, leaves) = fs.listStatus(path).partition(_.isDir)
-      leaves ++ directories.flatMap(f => listLeafStatuses(fs, f.getPath))
+    listLeafStatuses(fs, fs.getFileStatus(basePath))
+  }
+
+  /**
+   * Get [[FileStatus]] objects for all leaf children (files) under the given base path. If the
+   * given path points to a file, return a single-element collection containing [[FileStatus]] of
+   * that file.
+   */
+  def listLeafStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = {
+    def recurse(status: FileStatus): Seq[FileStatus] = {
+      val (directories, leaves) = fs.listStatus(status.getPath).partition(_.isDir)
+      leaves ++ directories.flatMap(f => listLeafStatuses(fs, f))
     }
 
-    val baseStatus = fs.getFileStatus(basePath)
-    if (baseStatus.isDir) recurse(basePath) else Array(baseStatus)
+    if (baseStatus.isDir) recurse(baseStatus) else Seq(baseStatus)
+  }
+
+  def listLeafDirStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = {
+    listLeafDirStatuses(fs, fs.getFileStatus(basePath))
+  }
+
+  def listLeafDirStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = {
+    def recurse(status: FileStatus): Seq[FileStatus] = {
+      val (directories, files) = fs.listStatus(status.getPath).partition(_.isDir)
+      val leaves = if (directories.isEmpty) Seq(status) else Seq.empty[FileStatus]
+      leaves ++ directories.flatMap(dir => listLeafDirStatuses(fs, dir))
+    }
+
+    assert(baseStatus.isDir)
+    recurse(baseStatus)
+  }
+
+  def globPath(pattern: Path): Seq[Path] = {
+    val fs = pattern.getFileSystem(conf)
+    Option(fs.globStatus(pattern)).map { statuses =>
+      statuses.map(_.getPath.makeQualified(fs.getUri, fs.getWorkingDirectory)).toSeq
+    }.getOrElse(Seq.empty[Path])
   }
 
   /**
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index ad3d8426bdedd..a47e29e2ef365 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -94,11 +94,23 @@ object MimaExcludes {
             // This `protected[sql]` method was removed in 1.3.1
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.sql.SQLContext.checkAnalysis"),
-            // This `private[sql]` class was removed in 1.4.0:
+            // These `private[sql]` class were removed in 1.4.0:
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.execution.AddExchange"),
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.execution.AddExchange$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.PartitionSpec"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.PartitionSpec$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.Partition"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.Partition$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues$"),
             // These test support classes were moved out of src/main and into src/test:
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.ParquetTestData"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 265a61592b943..f3107f7b51ad8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -27,23 +27,23 @@ import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
 import com.fasterxml.jackson.core.JsonFactory
-
 import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, SqlParser}
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, ResolvedStar}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedAttribute, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
-import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, _}
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
+import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, ScalaReflection, SqlParser}
 import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
 import org.apache.spark.sql.jdbc.JDBCWriteDetails
 import org.apache.spark.sql.json.JacksonGenerator
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, ResolvedDataSource}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 
@@ -400,7 +400,9 @@ class DataFrame private[sql](
         joined.left,
         joined.right,
         joinType = Inner,
-        Some(EqualTo(joined.left.resolve(usingColumn), joined.right.resolve(usingColumn))))
+        Some(expressions.EqualTo(
+          joined.left.resolve(usingColumn),
+          joined.right.resolve(usingColumn))))
     )
   }
 
@@ -465,8 +467,8 @@ class DataFrame private[sql](
     // By the time we get here, since we have already run analysis, all attributes should've been
     // resolved and become AttributeReference.
     val cond = plan.condition.map { _.transform {
-      case EqualTo(a: AttributeReference, b: AttributeReference) if a.sameRef(b) =>
-        EqualTo(plan.left.resolve(a.name), plan.right.resolve(b.name))
+      case expressions.EqualTo(a: AttributeReference, b: AttributeReference) if a.sameRef(b) =>
+        expressions.EqualTo(plan.left.resolve(a.name), plan.right.resolve(b.name))
     }}
     plan.copy(condition = cond)
   }
@@ -1324,6 +1326,28 @@ class DataFrame private[sql](
     saveAsTable(tableName, source, mode, options.toMap)
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a table at the given path from the the contents of this DataFrame
+   * based on a given data source, [[SaveMode]] specified by mode, a set of options, and a list of
+   * partition columns.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   * @group output
+   */
+  @Experimental
+  def saveAsTable(
+      tableName: String,
+      source: String,
+      mode: SaveMode,
+      options: java.util.Map[String, String],
+      partitionColumns: java.util.List[String]): Unit = {
+    saveAsTable(tableName, source, mode, options.toMap, partitionColumns)
+  }
+
   /**
    * :: Experimental ::
    * (Scala-specific)
@@ -1350,6 +1374,7 @@ class DataFrame private[sql](
         tableName,
         source,
         temporary = false,
+        Array.empty[String],
         mode,
         options,
         logicalPlan)
@@ -1357,6 +1382,36 @@ class DataFrame private[sql](
     sqlContext.executePlan(cmd).toRdd
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a table at the given path from the the contents of this DataFrame
+   * based on a given data source, [[SaveMode]] specified by mode, a set of options, and a list of
+   * partition columns.
+   *
+   * Note that this currently only works with DataFrames that are created from a HiveContext as
+   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
+   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
+   * be the target of an `insertInto`.
+   * @group output
+   */
+  @Experimental
+  def saveAsTable(
+      tableName: String,
+      source: String,
+      mode: SaveMode,
+      options: Map[String, String],
+      partitionColumns: Seq[String]): Unit = {
+    sqlContext.executePlan(
+      CreateTableUsingAsSelect(
+        tableName,
+        source,
+        temporary = false,
+        partitionColumns.toArray,
+        mode,
+        options,
+        logicalPlan)).toRdd
+  }
+
   /**
    * :: Experimental ::
    * Saves the contents of this DataFrame to the given path,
@@ -1417,6 +1472,21 @@ class DataFrame private[sql](
     save(source, mode, options.toMap)
   }
 
+  /**
+   * :: Experimental ::
+   * Saves the contents of this DataFrame to the given path based on the given data source,
+   * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
+   * @group output
+   */
+  @Experimental
+  def save(
+      source: String,
+      mode: SaveMode,
+      options: java.util.Map[String, String],
+      partitionColumns: java.util.List[String]): Unit = {
+    save(source, mode, options.toMap, partitionColumns)
+  }
+
   /**
    * :: Experimental ::
    * (Scala-specific)
@@ -1429,7 +1499,22 @@ class DataFrame private[sql](
       source: String,
       mode: SaveMode,
       options: Map[String, String]): Unit = {
-    ResolvedDataSource(sqlContext, source, mode, options, this)
+    ResolvedDataSource(sqlContext, source, Array.empty[String], mode, options, this)
+  }
+
+  /**
+   * :: Experimental ::
+   * Saves the contents of this DataFrame to the given path based on the given data source,
+   * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
+   * @group output
+   */
+  @Experimental
+  def save(
+      source: String,
+      mode: SaveMode,
+      options: Map[String, String],
+      partitionColumns: Seq[String]): Unit = {
+    ResolvedDataSource(sqlContext, source, partitionColumns.toArray, mode, options, this)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index dcac97beafb04..f07bb196c11ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -66,6 +66,9 @@ private[spark] object SQLConf {
   // to its length exceeds the threshold.
   val SCHEMA_STRING_LENGTH_THRESHOLD = "spark.sql.sources.schemaStringLengthThreshold"
 
+  // Whether to perform partition discovery when loading external data sources.  Default to true.
+  val PARTITION_DISCOVERY_ENABLED = "spark.sql.sources.partitionDiscovery.enabled"
+
   // Whether to perform eager analysis when constructing a dataframe.
   // Set to false when debugging requires the ability to look at invalid query plans.
   val DATAFRAME_EAGER_ANALYSIS = "spark.sql.eagerAnalysis"
@@ -235,6 +238,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
   private[spark] def defaultDataSourceName: String =
     getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
 
+  private[spark] def partitionDiscoveryEnabled() =
+    getConf(SQLConf.PARTITION_DISCOVERY_ENABLED, "true").toBoolean
+
   // Do not use a value larger than 4000 as the default value of this property.
   // See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more information.
   private[spark] def schemaStringLengthThreshold: Int =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 648021806f3ee..afee09adaae20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -762,7 +762,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def load(source: String, options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, None, source, options)
+    val resolved = ResolvedDataSource(this, None, Array.empty[String], source, options)
     DataFrame(this, LogicalRelation(resolved.relation))
   }
 
@@ -781,6 +781,37 @@ class SQLContext(@transient val sparkContext: SparkContext)
     load(source, schema, options.toMap)
   }
 
+  /**
+   * :: Experimental ::
+   * (Java-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+   *
+   * @group genericdata
+   */
+  @Experimental
+  def load(
+      source: String,
+      schema: StructType,
+      partitionColumns: Array[String],
+      options: java.util.Map[String, String]): DataFrame = {
+    load(source, schema, partitionColumns, options.toMap)
+  }
+
+  /**
+   * :: Experimental ::
+   * (Scala-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+   * @group genericdata
+   */
+  @Experimental
+  def load(
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): DataFrame = {
+    val resolved = ResolvedDataSource(this, Some(schema), Array.empty[String], source, options)
+    DataFrame(this, LogicalRelation(resolved.relation))
+  }
+
   /**
    * :: Experimental ::
    * (Scala-specific) Returns the dataset specified by the given data source and
@@ -791,8 +822,9 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def load(
       source: String,
       schema: StructType,
+      partitionColumns: Array[String],
       options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, Some(schema), source, options)
+    val resolved = ResolvedDataSource(this, Some(schema), partitionColumns, source, options)
     DataFrame(this, LogicalRelation(resolved.relation))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 56a4689eb58f0..af0029cb84f9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -343,9 +343,13 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case c: CreateTableUsing if c.temporary && c.allowExisting =>
         sys.error("allowExisting should be set to false when creating a temporary table.")
 
-      case CreateTableUsingAsSelect(tableName, provider, true, mode, opts, query) =>
-        val cmd =
-          CreateTempTableUsingAsSelect(tableName, provider, mode, opts, query)
+      case CreateTableUsingAsSelect(tableName, provider, true, partitionsCols, mode, opts, query)
+          if partitionsCols.nonEmpty =>
+        sys.error("Cannot create temporary partitioned table.")
+
+      case CreateTableUsingAsSelect(tableName, provider, true, _, mode, opts, query) =>
+        val cmd = CreateTempTableUsingAsSelect(
+          tableName, provider, Array.empty[String], mode, opts, query)
         ExecutedCommand(cmd) :: Nil
       case c: CreateTableUsingAsSelect if !c.temporary =>
         sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 85e60733bc57a..ee4b1c72a2148 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -136,10 +136,6 @@ private[sql] class DefaultSource
   }
 }
 
-private[sql] case class Partition(values: Row, path: String)
-
-private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition])
-
 /**
  * An alternative to [[ParquetRelation]] that plugs in using the data sources API.  This class is
  * intended as a full replacement of the Parquet support in Spark SQL.  The old implementation will
@@ -307,7 +303,7 @@ private[sql] case class ParquetRelation2(
 
         if (partitionDirs.nonEmpty) {
           // Parses names and values of partition columns, and infer their data types.
-          ParquetRelation2.parsePartitions(partitionDirs, defaultPartitionName)
+          PartitioningUtils.parsePartitions(partitionDirs, defaultPartitionName)
         } else {
           // No partition directories found, makes an empty specification
           PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[Partition])
@@ -805,7 +801,7 @@ private[sql] object ParquetRelation2 extends Logging {
     val ordinalMap = metastoreSchema.zipWithIndex.map {
       case (field, index) => field.name.toLowerCase -> index
     }.toMap
-    val reorderedParquetSchema = mergedParquetSchema.sortBy(f => 
+    val reorderedParquetSchema = mergedParquetSchema.sortBy(f =>
       ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1))
 
     StructType(metastoreSchema.zip(reorderedParquetSchema).map {
@@ -841,178 +837,4 @@ private[sql] object ParquetRelation2 extends Logging {
       .filter(_.nullable)
     StructType(parquetSchema ++ missingFields)
   }
-
-
-  // TODO Data source implementations shouldn't touch Catalyst types (`Literal`).
-  // However, we are already using Catalyst expressions for partition pruning and predicate
-  // push-down here...
-  private[parquet] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) {
-    require(columnNames.size == literals.size)
-  }
-
-  /**
-   * Given a group of qualified paths, tries to parse them and returns a partition specification.
-   * For example, given:
-   * {{{
-   *   hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14
-   *   hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28
-   * }}}
-   * it returns:
-   * {{{
-   *   PartitionSpec(
-   *     partitionColumns = StructType(
-   *       StructField(name = "a", dataType = IntegerType, nullable = true),
-   *       StructField(name = "b", dataType = StringType, nullable = true),
-   *       StructField(name = "c", dataType = DoubleType, nullable = true)),
-   *     partitions = Seq(
-   *       Partition(
-   *         values = Row(1, "hello", 3.14),
-   *         path = "hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14"),
-   *       Partition(
-   *         values = Row(2, "world", 6.28),
-   *         path = "hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28")))
-   * }}}
-   */
-  private[parquet] def parsePartitions(
-      paths: Seq[Path],
-      defaultPartitionName: String): PartitionSpec = {
-    val partitionValues = resolvePartitions(paths.map(parsePartition(_, defaultPartitionName)))
-    val fields = {
-      val (PartitionValues(columnNames, literals)) = partitionValues.head
-      columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
-        StructField(name, dataType, nullable = true)
-      }
-    }
-
-    val partitions = partitionValues.zip(paths).map {
-      case (PartitionValues(_, literals), path) =>
-        Partition(Row(literals.map(_.value): _*), path.toString)
-    }
-
-    PartitionSpec(StructType(fields), partitions)
-  }
-
-  /**
-   * Parses a single partition, returns column names and values of each partition column.  For
-   * example, given:
-   * {{{
-   *   path = hdfs://<host>:<port>/path/to/partition/a=42/b=hello/c=3.14
-   * }}}
-   * it returns:
-   * {{{
-   *   PartitionValues(
-   *     Seq("a", "b", "c"),
-   *     Seq(
-   *       Literal.create(42, IntegerType),
-   *       Literal.create("hello", StringType),
-   *       Literal.create(3.14, FloatType)))
-   * }}}
-   */
-  private[parquet] def parsePartition(
-      path: Path,
-      defaultPartitionName: String): PartitionValues = {
-    val columns = ArrayBuffer.empty[(String, Literal)]
-    // Old Hadoop versions don't have `Path.isRoot`
-    var finished = path.getParent == null
-    var chopped = path
-
-    while (!finished) {
-      val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName)
-      maybeColumn.foreach(columns += _)
-      chopped = chopped.getParent
-      finished = maybeColumn.isEmpty || chopped.getParent == null
-    }
-
-    val (columnNames, values) = columns.reverse.unzip
-    PartitionValues(columnNames, values)
-  }
-
-  private def parsePartitionColumn(
-      columnSpec: String,
-      defaultPartitionName: String): Option[(String, Literal)] = {
-    val equalSignIndex = columnSpec.indexOf('=')
-    if (equalSignIndex == -1) {
-      None
-    } else {
-      val columnName = columnSpec.take(equalSignIndex)
-      assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'")
-
-      val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
-      assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'")
-
-      val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName)
-      Some(columnName -> literal)
-    }
-  }
-
-  /**
-   * Resolves possible type conflicts between partitions by up-casting "lower" types.  The up-
-   * casting order is:
-   * {{{
-   *   NullType ->
-   *   IntegerType -> LongType ->
-   *   FloatType -> DoubleType -> DecimalType.Unlimited ->
-   *   StringType
-   * }}}
-   */
-  private[parquet] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = {
-    // Column names of all partitions must match
-    val distinctPartitionsColNames = values.map(_.columnNames).distinct
-    assert(distinctPartitionsColNames.size == 1, {
-      val list = distinctPartitionsColNames.mkString("\t", "\n", "")
-      s"Conflicting partition column names detected:\n$list"
-    })
-
-    // Resolves possible type conflicts for each column
-    val columnCount = values.head.columnNames.size
-    val resolvedValues = (0 until columnCount).map { i =>
-      resolveTypeConflicts(values.map(_.literals(i)))
-    }
-
-    // Fills resolved literals back to each partition
-    values.zipWithIndex.map { case (d, index) =>
-      d.copy(literals = resolvedValues.map(_(index)))
-    }
-  }
-
-  /**
-   * Converts a string to a `Literal` with automatic type inference.  Currently only supports
-   * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and
-   * [[StringType]].
-   */
-  private[parquet] def inferPartitionColumnValue(
-      raw: String,
-      defaultPartitionName: String): Literal = {
-    // First tries integral types
-    Try(Literal.create(Integer.parseInt(raw), IntegerType))
-      .orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
-      // Then falls back to fractional types
-      .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType)))
-      .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType)))
-      .orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited)))
-      // Then falls back to string
-      .getOrElse {
-        if (raw == defaultPartitionName) Literal.create(null, NullType)
-        else Literal.create(raw, StringType)
-      }
-  }
-
-  private val upCastingOrder: Seq[DataType] =
-    Seq(NullType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited, StringType)
-
-  /**
-   * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower"
-   * types.
-   */
-  private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = {
-    val desiredType = {
-      val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
-      // Falls back to string if all values of this column are null or empty string
-      if (topType == NullType) StringType else topType
-    }
-
-    literals.map { case l @ Literal(_, dataType) =>
-      Literal.create(Cast(l, desiredType).eval(), desiredType)
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index b3d71f687a60a..a5410cda0fe6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -17,20 +17,25 @@
 
 package org.apache.spark.sql.sources
 
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.Logging
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.types.{UTF8String, StringType}
-import org.apache.spark.sql.{Row, Strategy, execution, sources}
+import org.apache.spark.sql.types.{StructType, UTF8String, StringType}
+import org.apache.spark.sql._
 
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
  */
-private[sql] object DataSourceStrategy extends Strategy {
+private[sql] object DataSourceStrategy extends Strategy with Logging {
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
     case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: CatalystScan)) =>
       pruneFilterProjectRaw(
@@ -53,6 +58,51 @@ private[sql] object DataSourceStrategy extends Strategy {
         filters,
         (a, _) => t.buildScan(a)) :: Nil
 
+    // Scanning partitioned FSBasedRelation
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: FSBasedRelation))
+        if t.partitionSpec.partitionColumns.nonEmpty =>
+      val selectedPartitions = prunePartitions(filters, t.partitionSpec).toArray
+
+      logInfo {
+        val total = t.partitionSpec.partitions.length
+        val selected = selectedPartitions.length
+        val percentPruned = (1 - total.toDouble / selected.toDouble) * 100
+        s"Selected $selected partitions out of $total, pruned $percentPruned% partitions."
+      }
+
+      // Only pushes down predicates that do not reference partition columns.
+      val pushedFilters = {
+        val partitionColumnNames = t.partitionSpec.partitionColumns.map(_.name).toSet
+        filters.filter { f =>
+          val referencedColumnNames = f.references.map(_.name).toSet
+          referencedColumnNames.intersect(partitionColumnNames).isEmpty
+        }
+      }
+
+      buildPartitionedTableScan(
+        l,
+        projectList,
+        pushedFilters,
+        t.partitionSpec.partitionColumns,
+        selectedPartitions) :: Nil
+
+    // Scanning non-partitioned FSBasedRelation
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: FSBasedRelation)) =>
+      val inputPaths = t.paths.map(new Path(_)).flatMap { path =>
+        val fs = path.getFileSystem(t.sqlContext.sparkContext.hadoopConfiguration)
+        val qualifiedPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        SparkHadoopUtil.get.listLeafStatuses(fs, qualifiedPath).map(_.getPath).filterNot { path =>
+          val name = path.getName
+          name.startsWith("_") || name.startsWith(".")
+        }.map(fs.makeQualified(_).toString)
+      }
+
+      pruneFilterProject(
+        l,
+        projectList,
+        filters,
+        (a, f) => t.buildScan(a, f, inputPaths)) :: Nil
+
     case l @ LogicalRelation(t: TableScan) =>
       createPhysicalRDD(l.relation, l.output, t.buildScan()) :: Nil
 
@@ -60,9 +110,144 @@ private[sql] object DataSourceStrategy extends Strategy {
       l @ LogicalRelation(t: InsertableRelation), part, query, overwrite, false) if part.isEmpty =>
       execution.ExecutedCommand(InsertIntoDataSource(l, query, overwrite)) :: Nil
 
+    case i @ logical.InsertIntoTable(
+      l @ LogicalRelation(t: FSBasedRelation), part, query, overwrite, false) if part.isEmpty =>
+      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+      execution.ExecutedCommand(
+        InsertIntoFSBasedRelation(t, query, Array.empty[String], mode)) :: Nil
+
     case _ => Nil
   }
 
+  private def buildPartitionedTableScan(
+      logicalRelation: LogicalRelation,
+      projections: Seq[NamedExpression],
+      filters: Seq[Expression],
+      partitionColumns: StructType,
+      partitions: Array[Partition]) = {
+    val output = projections.map(_.toAttribute)
+    val relation = logicalRelation.relation.asInstanceOf[FSBasedRelation]
+
+    // Builds RDD[Row]s for each selected partition.
+    val perPartitionRows = partitions.map { case Partition(partitionValues, dir) =>
+      // Paths to all data files within this partition
+      val dataFilePaths = {
+        val dirPath = new Path(dir)
+        val fs = dirPath.getFileSystem(SparkHadoopUtil.get.conf)
+        fs.listStatus(dirPath).map(_.getPath).filterNot { path =>
+          val name = path.getName
+          name.startsWith("_") || name.startsWith(".")
+        }.map(fs.makeQualified(_).toString)
+      }
+
+      // The table scan operator (PhysicalRDD) which retrieves required columns from data files.
+      // Notice that the schema of data files, represented by `relation.dataSchema`, may contain
+      // some partition column(s).
+      val scan =
+        pruneFilterProject(
+          logicalRelation,
+          projections,
+          filters,
+          (requiredColumns, filters) => {
+            val partitionColNames = partitionColumns.fieldNames
+
+            // Don't scan any partition columns to save I/O.  Here we are being optimistic and
+            // assuming partition columns data stored in data files are always consistent with those
+            // partition values encoded in partition directory paths.
+            val nonPartitionColumns = requiredColumns.filterNot(partitionColNames.contains)
+            val dataRows = relation.buildScan(nonPartitionColumns, filters, dataFilePaths)
+
+            // Merges data values with partition values.
+            mergeWithPartitionValues(
+              relation.schema,
+              requiredColumns,
+              partitionColNames,
+              partitionValues,
+              dataRows)
+          })
+
+      scan.execute()
+    }
+
+    val unionedRows = perPartitionRows.reduceOption(_ ++ _).getOrElse {
+      relation.sqlContext.emptyResult
+    }
+
+    createPhysicalRDD(logicalRelation.relation, output, unionedRows)
+  }
+
+  private def mergeWithPartitionValues(
+      schema: StructType,
+      requiredColumns: Array[String],
+      partitionColumns: Array[String],
+      partitionValues: Row,
+      dataRows: RDD[Row]): RDD[Row] = {
+    val nonPartitionColumns = requiredColumns.filterNot(partitionColumns.contains)
+
+    // If output columns contain any partition column(s), we need to merge scanned data
+    // columns and requested partition columns to form the final result.
+    if (!requiredColumns.sameElements(nonPartitionColumns)) {
+      val mergers = requiredColumns.zipWithIndex.map { case (name, index) =>
+        // To see whether the `index`-th column is a partition column...
+        val i = partitionColumns.indexOf(name)
+        if (i != -1) {
+          // If yes, gets column value from partition values.
+          (mutableRow: MutableRow, dataRow: expressions.Row, ordinal: Int) => {
+            mutableRow(ordinal) = partitionValues(i)
+          }
+        } else {
+          // Otherwise, inherits the value from scanned data.
+          val i = nonPartitionColumns.indexOf(name)
+          (mutableRow: MutableRow, dataRow: expressions.Row, ordinal: Int) => {
+            mutableRow(ordinal) = dataRow(i)
+          }
+        }
+      }
+
+      dataRows.mapPartitions { iterator =>
+        val dataTypes = requiredColumns.map(schema(_).dataType)
+        val mutableRow = new SpecificMutableRow(dataTypes)
+        iterator.map { dataRow =>
+          var i = 0
+          while (i < mutableRow.length) {
+            mergers(i)(mutableRow, dataRow, i)
+            i += 1
+          }
+          mutableRow.asInstanceOf[expressions.Row]
+        }
+      }
+    } else {
+      dataRows
+    }
+  }
+
+  protected def prunePartitions(
+      predicates: Seq[Expression],
+      partitionSpec: PartitionSpec): Seq[Partition] = {
+    val PartitionSpec(partitionColumns, partitions) = partitionSpec
+    val partitionColumnNames = partitionColumns.map(_.name).toSet
+    val partitionPruningPredicates = predicates.filter {
+      _.references.map(_.name).toSet.subsetOf(partitionColumnNames)
+    }
+
+    if (partitionPruningPredicates.nonEmpty) {
+      val predicate =
+        partitionPruningPredicates
+          .reduceOption(expressions.And)
+          .getOrElse(Literal(true))
+
+      val boundPredicate = InterpretedPredicate.create(predicate.transform {
+        case a: AttributeReference =>
+          val index = partitionColumns.indexWhere(a.name == _.name)
+          BoundReference(index, partitionColumns(index).dataType, nullable = true)
+      })
+
+      partitions.filter { case Partition(values, _) => boundPredicate(values) }
+    } else {
+      partitions
+    }
+  }
+
   // Based on Public API.
   protected def pruneFilterProject(
       relation: LogicalRelation,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
new file mode 100644
index 0000000000000..d30f7f65e21c0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
+import java.math.{BigDecimal => JBigDecimal}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.util.Try
+
+import com.google.common.cache.{CacheBuilder, Cache}
+import org.apache.hadoop.fs.{FileStatus, Path}
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
+import org.apache.spark.sql.types._
+
+private[sql] case class Partition(values: Row, path: String)
+
+private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition])
+
+private[sql] object PartitioningUtils {
+  private[sql] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) {
+    require(columnNames.size == literals.size)
+  }
+
+  /**
+   * Given a group of qualified paths, tries to parse them and returns a partition specification.
+   * For example, given:
+   * {{{
+   *   hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14
+   *   hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28
+   * }}}
+   * it returns:
+   * {{{
+   *   PartitionSpec(
+   *     partitionColumns = StructType(
+   *       StructField(name = "a", dataType = IntegerType, nullable = true),
+   *       StructField(name = "b", dataType = StringType, nullable = true),
+   *       StructField(name = "c", dataType = DoubleType, nullable = true)),
+   *     partitions = Seq(
+   *       Partition(
+   *         values = Row(1, "hello", 3.14),
+   *         path = "hdfs://<host>:<port>/path/to/partition/a=1/b=hello/c=3.14"),
+   *       Partition(
+   *         values = Row(2, "world", 6.28),
+   *         path = "hdfs://<host>:<port>/path/to/partition/a=2/b=world/c=6.28")))
+   * }}}
+   */
+  private[sql] def parsePartitions(
+      paths: Seq[Path],
+      defaultPartitionName: String): PartitionSpec = {
+    val partitionValues = resolvePartitions(paths.map(parsePartition(_, defaultPartitionName)))
+    val fields = {
+      val (PartitionValues(columnNames, literals)) = partitionValues.head
+      columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
+        StructField(name, dataType, nullable = true)
+      }
+    }
+
+    val partitions = partitionValues.zip(paths).map {
+      case (PartitionValues(_, literals), path) =>
+        Partition(Row(literals.map(_.value): _*), path.toString)
+    }
+
+    PartitionSpec(StructType(fields), partitions)
+  }
+
+  /**
+   * Parses a single partition, returns column names and values of each partition column.  For
+   * example, given:
+   * {{{
+   *   path = hdfs://<host>:<port>/path/to/partition/a=42/b=hello/c=3.14
+   * }}}
+   * it returns:
+   * {{{
+   *   PartitionValues(
+   *     Seq("a", "b", "c"),
+   *     Seq(
+   *       Literal.create(42, IntegerType),
+   *       Literal.create("hello", StringType),
+   *       Literal.create(3.14, FloatType)))
+   * }}}
+   */
+  private[sql] def parsePartition(
+      path: Path,
+      defaultPartitionName: String): PartitionValues = {
+    val columns = ArrayBuffer.empty[(String, Literal)]
+    // Old Hadoop versions don't have `Path.isRoot`
+    var finished = path.getParent == null
+    var chopped = path
+
+    while (!finished) {
+      val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName)
+      maybeColumn.foreach(columns += _)
+      chopped = chopped.getParent
+      finished = maybeColumn.isEmpty || chopped.getParent == null
+    }
+
+    val (columnNames, values) = columns.reverse.unzip
+    PartitionValues(columnNames, values)
+  }
+
+  private def parsePartitionColumn(
+      columnSpec: String,
+      defaultPartitionName: String): Option[(String, Literal)] = {
+    val equalSignIndex = columnSpec.indexOf('=')
+    if (equalSignIndex == -1) {
+      None
+    } else {
+      val columnName = columnSpec.take(equalSignIndex)
+      assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'")
+
+      val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
+      assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'")
+
+      val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName)
+      Some(columnName -> literal)
+    }
+  }
+
+  /**
+   * Resolves possible type conflicts between partitions by up-casting "lower" types.  The up-
+   * casting order is:
+   * {{{
+   *   NullType ->
+   *   IntegerType -> LongType ->
+   *   FloatType -> DoubleType -> DecimalType.Unlimited ->
+   *   StringType
+   * }}}
+   */
+  private[sql] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = {
+    // Column names of all partitions must match
+    val distinctPartitionsColNames = values.map(_.columnNames).distinct
+    assert(distinctPartitionsColNames.size == 1, {
+      val list = distinctPartitionsColNames.mkString("\t", "\n", "")
+      s"Conflicting partition column names detected:\n$list"
+    })
+
+    // Resolves possible type conflicts for each column
+    val columnCount = values.head.columnNames.size
+    val resolvedValues = (0 until columnCount).map { i =>
+      resolveTypeConflicts(values.map(_.literals(i)))
+    }
+
+    // Fills resolved literals back to each partition
+    values.zipWithIndex.map { case (d, index) =>
+      d.copy(literals = resolvedValues.map(_(index)))
+    }
+  }
+
+  /**
+   * Converts a string to a `Literal` with automatic type inference.  Currently only supports
+   * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and
+   * [[StringType]].
+   */
+  private[sql] def inferPartitionColumnValue(
+      raw: String,
+      defaultPartitionName: String): Literal = {
+    // First tries integral types
+    Try(Literal.create(Integer.parseInt(raw), IntegerType))
+      .orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
+      // Then falls back to fractional types
+      .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType)))
+      .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType)))
+      .orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited)))
+      // Then falls back to string
+      .getOrElse {
+        if (raw == defaultPartitionName) Literal.create(null, NullType)
+        else Literal.create(raw, StringType)
+      }
+  }
+
+  private val upCastingOrder: Seq[DataType] =
+    Seq(NullType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited, StringType)
+
+  /**
+   * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower"
+   * types.
+   */
+  private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = {
+    val desiredType = {
+      val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
+      // Falls back to string if all values of this column are null or empty string
+      if (topType == NullType) StringType else topType
+    }
+
+    literals.map { case l @ Literal(_, dataType) =>
+      Literal.create(Cast(l, desiredType).eval(), desiredType)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index dbdb0d39c26a1..127133bfaf662 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -14,12 +14,28 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.sql.sources
 
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import java.util.Date
+
+import scala.collection.mutable
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter, FileOutputFormat}
+import org.apache.hadoop.util.Shell
+import parquet.hadoop.util.ContextUtil
+
+import org.apache.spark._
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateProjection
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.RunnableCommand
+import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
 
 private[sql] case class InsertIntoDataSource(
     logicalRelation: LogicalRelation,
@@ -41,3 +57,391 @@ private[sql] case class InsertIntoDataSource(
     Seq.empty[Row]
   }
 }
+
+private[sql] case class InsertIntoFSBasedRelation(
+    @transient relation: FSBasedRelation,
+    @transient query: LogicalPlan,
+    partitionColumns: Array[String],
+    mode: SaveMode)
+  extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    require(
+      relation.paths.length == 1,
+      s"Cannot write to multiple destinations: ${relation.paths.mkString(",")}")
+
+    val hadoopConf = sqlContext.sparkContext.hadoopConfiguration
+    val outputPath = new Path(relation.paths.head)
+    val fs = outputPath.getFileSystem(hadoopConf)
+    val qualifiedOutputPath = outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+
+    val doInsertion = (mode, fs.exists(qualifiedOutputPath)) match {
+      case (SaveMode.ErrorIfExists, true) =>
+        sys.error(s"path $qualifiedOutputPath already exists.")
+      case (SaveMode.Overwrite, true) =>
+        fs.delete(qualifiedOutputPath, true)
+        true
+      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
+        true
+      case (SaveMode.Ignore, exists) =>
+        !exists
+    }
+
+    if (doInsertion) {
+      val job = Job.getInstance(hadoopConf)
+      job.setOutputKeyClass(classOf[Void])
+      job.setOutputValueClass(classOf[Row])
+      FileOutputFormat.setOutputPath(job, qualifiedOutputPath)
+
+      val df = sqlContext.createDataFrame(
+        DataFrame(sqlContext, query).queryExecution.toRdd,
+        relation.schema,
+        needsConversion = false)
+
+      if (partitionColumns.isEmpty) {
+        insert(new DefaultWriterContainer(relation, job), df)
+      } else {
+        val writerContainer = new DynamicPartitionWriterContainer(
+          relation, job, partitionColumns, "__HIVE_DEFAULT_PARTITION__")
+        insertWithDynamicPartitions(sqlContext, writerContainer, df, partitionColumns)
+      }
+    }
+
+    Seq.empty[Row]
+  }
+
+  private def insert(writerContainer: BaseWriterContainer, df: DataFrame): Unit = {
+    // Uses local vals for serialization
+    val needsConversion = relation.needConversion
+    val dataSchema = relation.dataSchema
+
+    try {
+      writerContainer.driverSideSetup()
+      df.sqlContext.sparkContext.runJob(df.queryExecution.executedPlan.execute(), writeRows _)
+      writerContainer.commitJob()
+      relation.refresh()
+    } catch { case cause: Throwable =>
+      writerContainer.abortJob()
+      throw new SparkException("Job aborted.", cause)
+    }
+
+    def writeRows(taskContext: TaskContext, iterator: Iterator[Row]): Unit = {
+      writerContainer.executorSideSetup(taskContext)
+
+      try {
+        if (needsConversion) {
+          val converter = CatalystTypeConverters.createToScalaConverter(dataSchema)
+          while (iterator.hasNext) {
+            val row = converter(iterator.next()).asInstanceOf[Row]
+            writerContainer.outputWriterForRow(row).write(row)
+          }
+        } else {
+          while (iterator.hasNext) {
+            val row = iterator.next()
+            writerContainer.outputWriterForRow(row).write(row)
+          }
+        }
+        writerContainer.commitTask()
+      } catch { case cause: Throwable =>
+        writerContainer.abortTask()
+        throw new SparkException("Task failed while writing rows.", cause)
+      }
+    }
+  }
+
+  private def insertWithDynamicPartitions(
+      sqlContext: SQLContext,
+      writerContainer: BaseWriterContainer,
+      df: DataFrame,
+      partitionColumns: Array[String]): Unit = {
+    // Uses a local val for serialization
+    val needsConversion = relation.needConversion
+    val dataSchema = relation.dataSchema
+
+    require(
+      df.schema == relation.schema,
+      s"""DataFrame must have the same schema as the relation to which is inserted.
+         |DataFrame schema: ${df.schema}
+         |Relation schema: ${relation.schema}
+       """.stripMargin)
+
+    val partitionColumnsInSpec = relation.partitionColumns.fieldNames
+    require(
+      partitionColumnsInSpec.sameElements(partitionColumns),
+      s"""Partition columns mismatch.
+         |Expected: ${partitionColumnsInSpec.mkString(", ")}
+         |Actual: ${partitionColumns.mkString(", ")}
+       """.stripMargin)
+
+    val output = df.queryExecution.executedPlan.output
+    val (partitionOutput, dataOutput) = output.partition(a => partitionColumns.contains(a.name))
+    val codegenEnabled = df.sqlContext.conf.codegenEnabled
+
+    try {
+      writerContainer.driverSideSetup()
+      df.sqlContext.sparkContext.runJob(df.queryExecution.executedPlan.execute(), writeRows _)
+      writerContainer.commitJob()
+      relation.refresh()
+    } catch { case cause: Throwable =>
+      logError("Aborting job.", cause)
+      writerContainer.abortJob()
+      throw new SparkException("Job aborted.", cause)
+    }
+
+    def writeRows(taskContext: TaskContext, iterator: Iterator[Row]): Unit = {
+      writerContainer.executorSideSetup(taskContext)
+
+      val partitionProj = newProjection(codegenEnabled, partitionOutput, output)
+      val dataProj = newProjection(codegenEnabled, dataOutput, output)
+
+      if (needsConversion) {
+        val converter = CatalystTypeConverters.createToScalaConverter(dataSchema)
+        while (iterator.hasNext) {
+          val row = iterator.next()
+          val partitionPart = partitionProj(row)
+          val dataPart = dataProj(row)
+          val convertedDataPart = converter(dataPart).asInstanceOf[Row]
+          writerContainer.outputWriterForRow(partitionPart).write(convertedDataPart)
+        }
+      } else {
+        while (iterator.hasNext) {
+          val row = iterator.next()
+          val partitionPart = partitionProj(row)
+          val dataPart = dataProj(row)
+          writerContainer.outputWriterForRow(partitionPart).write(dataPart)
+        }
+      }
+
+      writerContainer.commitTask()
+    }
+  }
+
+  // This is copied from SparkPlan, probably should move this to a more general place.
+  private def newProjection(
+      codegenEnabled: Boolean,
+      expressions: Seq[Expression],
+      inputSchema: Seq[Attribute]): Projection = {
+    log.debug(
+      s"Creating Projection: $expressions, inputSchema: $inputSchema, codegen:$codegenEnabled")
+    if (codegenEnabled) {
+      GenerateProjection.generate(expressions, inputSchema)
+    } else {
+      new InterpretedProjection(expressions, inputSchema)
+    }
+  }
+}
+
+private[sql] abstract class BaseWriterContainer(
+    @transient val relation: FSBasedRelation,
+    @transient job: Job)
+  extends SparkHadoopMapReduceUtil
+  with Logging
+  with Serializable {
+
+  protected val serializableConf = new SerializableWritable(ContextUtil.getConfiguration(job))
+
+  // This is only used on driver side.
+  @transient private val jobContext: JobContext = job
+
+  // The following fields are initialized and used on both driver and executor side.
+  @transient protected var outputCommitter: FileOutputCommitter = _
+  @transient private var jobId: JobID = _
+  @transient private var taskId: TaskID = _
+  @transient private var taskAttemptId: TaskAttemptID = _
+  @transient protected var taskAttemptContext: TaskAttemptContext = _
+
+  protected val outputPath: String = {
+    assert(
+      relation.paths.length == 1,
+      s"Cannot write to multiple destinations: ${relation.paths.mkString(",")}")
+    relation.paths.head
+  }
+
+  protected val dataSchema = relation.dataSchema
+
+  protected val outputWriterClass: Class[_ <: OutputWriter] = relation.outputWriterClass
+
+  private var outputFormatClass: Class[_ <: OutputFormat[_, _]] = _
+
+  def driverSideSetup(): Unit = {
+    setupIDs(0, 0, 0)
+    setupConf()
+    taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId)
+    relation.prepareForWrite(job)
+    outputFormatClass = job.getOutputFormatClass
+    outputCommitter = newOutputCommitter(taskAttemptContext)
+    outputCommitter.setupJob(jobContext)
+  }
+
+  def executorSideSetup(taskContext: TaskContext): Unit = {
+    setupIDs(taskContext.stageId(), taskContext.partitionId(), taskContext.attemptNumber())
+    setupConf()
+    taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId)
+    outputCommitter = newOutputCommitter(taskAttemptContext)
+    outputCommitter.setupTask(taskAttemptContext)
+    initWriters()
+  }
+
+  private def newOutputCommitter(context: TaskAttemptContext): FileOutputCommitter = {
+    outputFormatClass.newInstance().getOutputCommitter(context) match {
+      case f: FileOutputCommitter => f
+      case f => sys.error(
+        s"FileOutputCommitter or its subclass is expected, but got a ${f.getClass.getName}.")
+    }
+  }
+
+  private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = {
+    this.jobId = SparkHadoopWriter.createJobID(new Date, jobId)
+    this.taskId = new TaskID(this.jobId, true, splitId)
+    this.taskAttemptId = new TaskAttemptID(taskId, attemptId)
+  }
+
+  private def setupConf(): Unit = {
+    serializableConf.value.set("mapred.job.id", jobId.toString)
+    serializableConf.value.set("mapred.tip.id", taskAttemptId.getTaskID.toString)
+    serializableConf.value.set("mapred.task.id", taskAttemptId.toString)
+    serializableConf.value.setBoolean("mapred.task.is.map", true)
+    serializableConf.value.setInt("mapred.task.partition", 0)
+  }
+
+  // Called on executor side when writing rows
+  def outputWriterForRow(row: Row): OutputWriter
+
+  protected def initWriters(): Unit
+
+  def commitTask(): Unit = {
+    SparkHadoopMapRedUtil.commitTask(
+      outputCommitter, taskAttemptContext, jobId.getId, taskId.getId, taskAttemptId.getId)
+  }
+
+  def abortTask(): Unit = {
+    outputCommitter.abortTask(taskAttemptContext)
+    logError(s"Task attempt $taskAttemptId aborted.")
+  }
+
+  def commitJob(): Unit = {
+    outputCommitter.commitJob(jobContext)
+    logInfo(s"Job $jobId committed.")
+  }
+
+  def abortJob(): Unit = {
+    outputCommitter.abortJob(jobContext, JobStatus.State.FAILED)
+    logError(s"Job $jobId aborted.")
+  }
+}
+
+private[sql] class DefaultWriterContainer(
+    @transient relation: FSBasedRelation,
+    @transient job: Job)
+  extends BaseWriterContainer(relation, job) {
+
+  @transient private var writer: OutputWriter = _
+
+  override protected def initWriters(): Unit = {
+    writer = outputWriterClass.newInstance()
+    writer.init(outputCommitter.getWorkPath.toString, dataSchema, taskAttemptContext)
+  }
+
+  override def outputWriterForRow(row: Row): OutputWriter = writer
+
+  override def commitTask(): Unit = {
+    writer.close()
+    super.commitTask()
+  }
+
+  override def abortTask(): Unit = {
+    writer.close()
+    super.abortTask()
+  }
+}
+
+private[sql] class DynamicPartitionWriterContainer(
+    @transient relation: FSBasedRelation,
+    @transient job: Job,
+    partitionColumns: Array[String],
+    defaultPartitionName: String)
+  extends BaseWriterContainer(relation, job) {
+
+  // All output writers are created on executor side.
+  @transient protected var outputWriters: mutable.Map[String, OutputWriter] = _
+
+  override protected def initWriters(): Unit = {
+    outputWriters = mutable.Map.empty[String, OutputWriter]
+  }
+
+  override def outputWriterForRow(row: Row): OutputWriter = {
+    val partitionPath = partitionColumns.zip(row.toSeq).map { case (col, rawValue) =>
+      val string = if (rawValue == null) null else String.valueOf(rawValue)
+      val valueString = if (string == null || string.isEmpty) {
+        defaultPartitionName
+      } else {
+        DynamicPartitionWriterContainer.escapePathName(string)
+      }
+      s"/$col=$valueString"
+    }.mkString
+
+    outputWriters.getOrElseUpdate(partitionPath, {
+      val path = new Path(outputCommitter.getWorkPath, partitionPath.stripPrefix(Path.SEPARATOR))
+      val writer = outputWriterClass.newInstance()
+      writer.init(path.toString, dataSchema, taskAttemptContext)
+      writer
+    })
+  }
+
+  override def commitTask(): Unit = {
+    outputWriters.values.foreach(_.close())
+    super.commitTask()
+  }
+
+  override def abortTask(): Unit = {
+    outputWriters.values.foreach(_.close())
+    super.abortTask()
+  }
+}
+
+private[sql] object DynamicPartitionWriterContainer {
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils).
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  val charToEscape = {
+    val bitSet = new java.util.BitSet(128)
+
+    /**
+     * ASCII 01-1F are HTTP control characters that need to be escaped.
+     * \u000A and \u000D are \n and \r, respectively.
+     */
+    val clist = Array(
+      '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009',
+      '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013',
+      '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
+      '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F',
+      '{', '[', ']', '^')
+
+    clist.foreach(bitSet.set(_))
+
+    if (Shell.WINDOWS) {
+      Array(' ', '<', '>', '|').foreach(bitSet.set(_))
+    }
+
+    bitSet
+  }
+
+  def needsEscaping(c: Char): Boolean = {
+    c >= 0 && c < charToEscape.size() && charToEscape.get(c)
+  }
+
+  def escapePathName(path: String): String = {
+    val builder = new StringBuilder()
+    path.foreach { c =>
+      if (DynamicPartitionWriterContainer.needsEscaping(c)) {
+        builder.append('%')
+        builder.append(f"${c.asInstanceOf[Int]}%02x")
+      } else {
+        builder.append(c)
+      }
+    }
+
+    builder.toString()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 06c64f2bdd59e..595c5eb40e295 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.sql.sources
 
-import scala.language.existentials
+import scala.language.{existentials, implicitConversions}
 import scala.util.matching.Regex
-import scala.language.implicitConversions
+
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.{AnalysisException, SaveMode, DataFrame, SQLContext}
-import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Row}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext, SaveMode}
 import org.apache.spark.util.Utils
 
 /**
@@ -111,6 +113,7 @@ private[sql] class DDLParser(
           CreateTableUsingAsSelect(tableName,
             provider,
             temp.isDefined,
+            Array.empty[String],
             mode,
             options,
             queryPlan)
@@ -157,7 +160,7 @@ private[sql] class DDLParser(
   protected lazy val className: Parser[String] = repsep(ident, ".") ^^ { case s => s.mkString(".")}
 
   override implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
-    s"identifier matching regex ${regex}", {
+    s"identifier matching regex $regex", {
       case lexical.Identifier(str) if regex.unapplySeq(str).isDefined => str
       case lexical.Keyword(str) if regex.unapplySeq(str).isDefined => str
     }
@@ -214,6 +217,7 @@ private[sql] object ResolvedDataSource {
   def apply(
       sqlContext: SQLContext,
       userSpecifiedSchema: Option[StructType],
+      partitionColumns: Array[String],
       provider: String,
       options: Map[String, String]): ResolvedDataSource = {
     val clazz: Class[_] = lookupDataSource(provider)
@@ -222,6 +226,27 @@ private[sql] object ResolvedDataSource {
       case Some(schema: StructType) => clazz.newInstance() match {
         case dataSource: SchemaRelationProvider =>
           dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options), schema)
+        case dataSource: FSBasedRelationProvider =>
+          val maybePartitionsSchema = if (partitionColumns.isEmpty) {
+            None
+          } else {
+            Some(partitionColumnsSchema(schema, partitionColumns))
+          }
+
+          val caseInsensitiveOptions= new CaseInsensitiveMap(options)
+          val paths = {
+            val patternPath = new Path(caseInsensitiveOptions("path"))
+            SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray
+          }
+
+          val dataSchema = StructType(schema.filterNot(f => partitionColumns.contains(f.name)))
+
+          dataSource.createRelation(
+            sqlContext,
+            paths,
+            Some(schema),
+            maybePartitionsSchema,
+            caseInsensitiveOptions)
         case dataSource: org.apache.spark.sql.sources.RelationProvider =>
           throw new AnalysisException(s"$className does not allow user-specified schemas.")
         case _ =>
@@ -231,20 +256,39 @@ private[sql] object ResolvedDataSource {
       case None => clazz.newInstance() match {
         case dataSource: RelationProvider =>
           dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options))
+        case dataSource: FSBasedRelationProvider =>
+          val caseInsensitiveOptions = new CaseInsensitiveMap(options)
+          val paths = {
+            val patternPath = new Path(caseInsensitiveOptions("path"))
+            SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray
+          }
+          dataSource.createRelation(sqlContext, paths, None, None, caseInsensitiveOptions)
         case dataSource: org.apache.spark.sql.sources.SchemaRelationProvider =>
           throw new AnalysisException(
             s"A schema needs to be specified when using $className.")
         case _ =>
-          throw new AnalysisException(s"$className is not a RelationProvider.")
+          throw new AnalysisException(
+            s"$className is neither a RelationProvider nor a FSBasedRelationProvider.")
       }
     }
     new ResolvedDataSource(clazz, relation)
   }
 
+  private def partitionColumnsSchema(
+      schema: StructType,
+      partitionColumns: Array[String]): StructType = {
+    StructType(partitionColumns.map { col =>
+      schema.find(_.name == col).getOrElse {
+        throw new RuntimeException(s"Partition column $col not found in schema $schema")
+      }
+    }).asNullable
+  }
+
   /** Create a [[ResolvedDataSource]] for saving the content of the given [[DataFrame]]. */
   def apply(
       sqlContext: SQLContext,
       provider: String,
+      partitionColumns: Array[String],
       mode: SaveMode,
       options: Map[String, String],
       data: DataFrame): ResolvedDataSource = {
@@ -252,6 +296,31 @@ private[sql] object ResolvedDataSource {
     val relation = clazz.newInstance() match {
       case dataSource: CreatableRelationProvider =>
         dataSource.createRelation(sqlContext, mode, options, data)
+      case dataSource: FSBasedRelationProvider =>
+        // Don't glob path for the write path.  The contracts here are:
+        //  1. Only one output path can be specified on the write path;
+        //  2. Output path must be a legal HDFS style file system path;
+        //  3. It's OK that the output path doesn't exist yet;
+        val caseInsensitiveOptions = new CaseInsensitiveMap(options)
+        val outputPath = {
+          val path = new Path(caseInsensitiveOptions("path"))
+          val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+          path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        }
+        val dataSchema = StructType(data.schema.filterNot(f => partitionColumns.contains(f.name)))
+        val r = dataSource.createRelation(
+          sqlContext,
+          Array(outputPath.toString),
+          Some(dataSchema.asNullable),
+          Some(partitionColumnsSchema(data.schema, partitionColumns)),
+          caseInsensitiveOptions)
+        sqlContext.executePlan(
+          InsertIntoFSBasedRelation(
+            r,
+            data.logicalPlan,
+            partitionColumns.toArray,
+            mode)).toRdd
+        r
       case _ =>
         sys.error(s"${clazz.getCanonicalName} does not allow create table as select.")
     }
@@ -310,6 +379,7 @@ private[sql] case class CreateTableUsingAsSelect(
     tableName: String,
     provider: String,
     temporary: Boolean,
+    partitionColumns: Array[String],
     mode: SaveMode,
     options: Map[String, String],
     child: LogicalPlan) extends UnaryNode {
@@ -324,8 +394,9 @@ private[sql] case class CreateTempTableUsing(
     provider: String,
     options: Map[String, String]) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[Row] = {
-    val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options)
+  def run(sqlContext: SQLContext): Seq[Row] = {
+    val resolved = ResolvedDataSource(
+      sqlContext, userSpecifiedSchema, Array.empty[String], provider, options)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
     Seq.empty
@@ -335,13 +406,14 @@ private[sql] case class CreateTempTableUsing(
 private[sql] case class CreateTempTableUsingAsSelect(
     tableName: String,
     provider: String,
+    partitionColumns: Array[String],
     mode: SaveMode,
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val df = DataFrame(sqlContext, query)
-    val resolved = ResolvedDataSource(sqlContext, provider, mode, options, df)
+    val resolved = ResolvedDataSource(sqlContext, provider, partitionColumns, mode, options, df)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index ca53dcdb92c52..5e010d21120f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -17,11 +17,19 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.spark.annotation.{Experimental, DeveloperApi}
+import scala.util.Try
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{SaveMode, DataFrame, Row, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{Row, _}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
+import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
  * ::DeveloperApi::
@@ -78,6 +86,41 @@ trait SchemaRelationProvider {
       schema: StructType): BaseRelation
 }
 
+/**
+ * ::DeveloperApi::
+ * Implemented by objects that produce relations for a specific kind of data source
+ * with a given schema and partitioned columns.  When Spark SQL is given a DDL operation with a
+ * USING clause specified (to specify the implemented [[FSBasedRelationProvider]]), a user defined
+ * schema, and an optional list of partition columns, this interface is used to pass in the
+ * parameters specified by a user.
+ *
+ * Users may specify the fully qualified class name of a given data source.  When that class is
+ * not found Spark SQL will append the class name `DefaultSource` to the path, allowing for
+ * less verbose invocation.  For example, 'org.apache.spark.sql.json' would resolve to the
+ * data source 'org.apache.spark.sql.json.DefaultSource'
+ *
+ * A new instance of this class with be instantiated each time a DDL call is made.
+ *
+ * The difference between a [[RelationProvider]] and a [[FSBasedRelationProvider]] is
+ * that users need to provide a schema and a (possibly empty) list of partition columns when
+ * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]],
+ * and [[FSBasedRelationProvider]] if it can support schema inference, user-specified
+ * schemas, and accessing partitioned relations.
+ */
+trait FSBasedRelationProvider {
+  /**
+   * Returns a new base relation with the given parameters, a user defined schema, and a list of
+   * partition columns. Note: the parameters' keywords are case insensitive and this insensitivity
+   * is enforced by the Map that is passed to the function.
+   */
+  def createRelation(
+      sqlContext: SQLContext,
+      paths: Array[String],
+      schema: Option[StructType],
+      partitionColumns: Option[StructType],
+      parameters: Map[String, String]): FSBasedRelation
+}
+
 @DeveloperApi
 trait CreatableRelationProvider {
   /**
@@ -207,3 +250,235 @@ trait InsertableRelation {
 trait CatalystScan {
   def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row]
 }
+
+/**
+ * ::Experimental::
+ * [[OutputWriter]] is used together with [[FSBasedRelation]] for persisting rows to the
+ * underlying file system.  Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
+ * An [[OutputWriter]] instance is created and initialized when a new output file is opened on
+ * executor side.  This instance is used to persist rows to this single output file.
+ */
+@Experimental
+abstract class OutputWriter {
+  /**
+   * Initializes this [[OutputWriter]] before any rows are persisted.
+   *
+   * @param path Path of the file to which this [[OutputWriter]] is supposed to write.  Note that
+   *        this may not point to the final output file.  For example, `FileOutputFormat` writes to
+   *        temporary directories and then merge written files back to the final destination.  In
+   *        this case, `path` points to a temporary output file under the temporary directory.
+   * @param dataSchema Schema of the rows to be written. Partition columns are not included in the
+   *        schema if the corresponding relation is partitioned.
+   * @param context The Hadoop MapReduce task context.
+   */
+  def init(
+      path: String,
+      dataSchema: StructType,
+      context: TaskAttemptContext): Unit = ()
+
+  /**
+   * Persists a single row.  Invoked on the executor side.  When writing to dynamically partitioned
+   * tables, dynamic partition columns are not included in rows to be written.
+   */
+  def write(row: Row): Unit
+
+  /**
+   * Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before
+   * the task output is committed.
+   */
+  def close(): Unit
+}
+
+/**
+ * ::Experimental::
+ * A [[BaseRelation]] that provides much of the common code required for formats that store their
+ * data to an HDFS compatible filesystem.
+ *
+ * For the read path, similar to [[PrunedFilteredScan]], it can eliminate unneeded columns and
+ * filter using selected predicates before producing an RDD containing all matching tuples as
+ * [[Row]] objects. In addition, when reading from Hive style partitioned tables stored in file
+ * systems, it's able to discover partitioning information from the paths of input directories, and
+ * perform partition pruning before start reading the data. Subclasses of [[FSBasedRelation()]] must
+ * override one of the three `buildScan` methods to implement the read path.
+ *
+ * For the write path, it provides the ability to write to both non-partitioned and partitioned
+ * tables.  Directory layout of the partitioned tables is compatible with Hive.
+ *
+ * @constructor This constructor is for internal uses only. The [[PartitionSpec]] argument is for
+ *              implementing metastore table conversion.
+ * @param paths Base paths of this relation.  For partitioned relations, it should be the root
+ *        directories of all partition directories.
+ * @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional
+ *        [[PartitionSpec]], so that partition discovery can be skipped.
+ */
+@Experimental
+abstract class FSBasedRelation private[sql](
+    val paths: Array[String],
+    maybePartitionSpec: Option[PartitionSpec])
+  extends BaseRelation {
+
+  /**
+   * Constructs an [[FSBasedRelation]].
+   *
+   * @param paths Base paths of this relation.  For partitioned relations, it should be either root
+   *        directories of all partition directories.
+   * @param partitionColumns Partition columns of this relation.
+   */
+  def this(paths: Array[String], partitionColumns: StructType) =
+    this(paths, {
+      if (partitionColumns.isEmpty) None
+      else Some(PartitionSpec(partitionColumns, Array.empty[Partition]))
+    })
+
+  /**
+   * Constructs an [[FSBasedRelation]].
+   *
+   * @param paths Base paths of this relation.  For partitioned relations, it should be root
+   *        directories of all partition directories.
+   */
+  def this(paths: Array[String]) = this(paths, None)
+
+  private val hadoopConf = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
+
+  private val codegenEnabled = sqlContext.conf.codegenEnabled
+
+  private var _partitionSpec: PartitionSpec = maybePartitionSpec.map { spec =>
+    spec.copy(partitionColumns = spec.partitionColumns.asNullable)
+  }.getOrElse {
+    if (sqlContext.conf.partitionDiscoveryEnabled()) {
+      discoverPartitions()
+    } else {
+      PartitionSpec(StructType(Nil), Array.empty[Partition])
+    }
+  }
+
+  private[sql] def partitionSpec: PartitionSpec = _partitionSpec
+
+  /**
+   * Partition columns. Note that they are always nullable.
+   */
+  def partitionColumns: StructType = partitionSpec.partitionColumns
+
+  private[sql] def refresh(): Unit = {
+    if (sqlContext.conf.partitionDiscoveryEnabled()) {
+      _partitionSpec = discoverPartitions()
+    }
+  }
+
+  private def discoverPartitions(): PartitionSpec = {
+    val basePaths = paths.map(new Path(_))
+    val leafDirs = basePaths.flatMap { path =>
+      val fs = path.getFileSystem(hadoopConf)
+      Try(fs.getFileStatus(path.makeQualified(fs.getUri, fs.getWorkingDirectory)))
+        .filter(_.isDir)
+        .map(SparkHadoopUtil.get.listLeafDirStatuses(fs, _))
+        .getOrElse(Seq.empty[FileStatus])
+    }.map(_.getPath)
+
+    if (leafDirs.nonEmpty) {
+      PartitioningUtils.parsePartitions(leafDirs, "__HIVE_DEFAULT_PARTITION__")
+    } else {
+      PartitionSpec(StructType(Array.empty[StructField]), Array.empty[Partition])
+    }
+  }
+
+  /**
+   * Schema of this relation.  It consists of columns appearing in [[dataSchema]] and all partition
+   * columns not appearing in [[dataSchema]].
+   */
+  override lazy val schema: StructType = {
+    val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet
+    StructType(dataSchema ++ partitionSpec.partitionColumns.filterNot { column =>
+      dataSchemaColumnNames.contains(column.name.toLowerCase)
+    })
+  }
+
+  /**
+   * Specifies schema of actual data files.  For partitioned relations, if one or more partitioned
+   * columns are contained in the data files, they should also appear in `dataSchema`.
+   */
+  def dataSchema: StructType
+
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   */
+  def buildScan(inputPaths: Array[String]): RDD[Row] = {
+    throw new RuntimeException(
+      "At least one buildScan() method should be overridden to read the relation.")
+  }
+
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * @param requiredColumns Required columns.
+   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   */
+  def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = {
+    // Yeah, to workaround serialization...
+    val dataSchema = this.dataSchema
+    val codegenEnabled = this.codegenEnabled
+
+    val requiredOutput = requiredColumns.map { col =>
+      val field = dataSchema(col)
+      BoundReference(dataSchema.fieldIndex(col), field.dataType, field.nullable)
+    }.toSeq
+
+    buildScan(inputPaths).mapPartitions { rows =>
+      val buildProjection = if (codegenEnabled) {
+        GenerateMutableProjection.generate(requiredOutput, dataSchema.toAttributes)
+      } else {
+        () => new InterpretedMutableProjection(requiredOutput, dataSchema.toAttributes)
+      }
+
+      val mutableProjection = buildProjection()
+      rows.map(mutableProjection)
+    }
+  }
+
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * @param requiredColumns Required columns.
+   * @param filters Candidate filters to be pushed down. The actual filter should be the conjunction
+   *        of all `filters`.  The pushed down filters are currently purely an optimization as they
+   *        will all be evaluated again. This means it is safe to use them with methods that produce
+   *        false positives such as filtering partitions based on a bloom filter.
+   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   */
+  def buildScan(
+      requiredColumns: Array[String],
+      filters: Array[Filter],
+      inputPaths: Array[String]): RDD[Row] = {
+    buildScan(requiredColumns, inputPaths)
+  }
+
+  /**
+   * Client side preparation for data writing can be put here.  For example, user defined output
+   * committer can be configured here.
+   *
+   * Note that the only side effect expected here is mutating `job` via its setters.  Especially,
+   * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states
+   * may cause unexpected behaviors.
+   */
+  def prepareForWrite(job: Job): Unit = ()
+
+  /**
+   * This method is responsible for producing a new [[OutputWriter]] for each newly opened output
+   * file on the executor side.
+   */
+  def outputWriterClass: Class[_ <: OutputWriter]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
index 6ed68d179edc9..aad1d248d0a28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
@@ -101,13 +101,13 @@ private[sql] case class PreWriteCheck(catalog: Catalog) extends (LogicalPlan =>
           }
         }
 
-      case i @ logical.InsertIntoTable(
-        l: LogicalRelation, partition, query, overwrite, ifNotExists)
-          if !l.isInstanceOf[InsertableRelation] =>
+      case logical.InsertIntoTable(LogicalRelation(_: InsertableRelation), _, _, _, _) => // OK
+      case logical.InsertIntoTable(LogicalRelation(_: FSBasedRelation), _, _, _, _) => // OK
+      case logical.InsertIntoTable(l: LogicalRelation, _, _, _, _) =>
         // The relation in l is not an InsertableRelation.
         failAnalysis(s"$l does not allow insertion.")
 
-      case CreateTableUsingAsSelect(tableName, _, _, SaveMode.Overwrite, _, query) =>
+      case CreateTableUsingAsSelect(tableName, _, _, _, SaveMode.Overwrite, _, query) =>
         // When the SaveMode is Overwrite, we need to check if the table is an input table of
         // the query. If so, we will throw an AnalysisException to let users know it is not allowed.
         if (catalog.tableExists(Seq(tableName))) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index b7561ce7298cb..bea568ed40049 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.parquet.ParquetRelation2._
+import org.apache.spark.sql.sources.PartitioningUtils._
+import org.apache.spark.sql.sources.{Partition, PartitionSpec}
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{QueryTest, Row, SQLContext}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 54f2f3cdec298..4e54b2eb8df7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.sources
 
-import java.io.{IOException, File}
+import java.io.{File, IOException}
 
-import org.apache.spark.sql.AnalysisException
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.util.Utils
 
 class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index bbf48efb24440..d754c8e3a8aa1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -19,25 +19,24 @@ package org.apache.spark.sql.hive
 
 import com.google.common.base.Objects
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
-
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.metastore.Warehouse
+import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.ql.metadata._
 import org.apache.hadoop.hive.serde2.Deserializer
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Catalog, OverrideCatalog}
+import org.apache.spark.sql.catalyst.analysis.{Catalog, MultiInstanceRelation, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.hive.client._
-import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
-import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.parquet.ParquetRelation2
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, Partition => ParquetPartition, PartitionSpec, ResolvedDataSource}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, SQLContext, SaveMode}
 import org.apache.spark.util.Utils
 
 /* Implicit conversions */
@@ -98,6 +97,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           ResolvedDataSource(
             hive,
             userSpecifiedSchema,
+            Array.empty[String],
             table.properties("spark.sql.sources.provider"),
             options)
 
@@ -438,6 +438,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
             desc.name,
             hive.conf.defaultDataSourceName,
             temporary = false,
+            Array.empty[String],
             mode,
             options = Map.empty[String, String],
             child
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index be9249a8b1f44..d46a127d47d31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -221,14 +221,14 @@ private[hive] trait HiveStrategies {
   object HiveDDLStrategy extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case CreateTableUsing(
-      tableName, userSpecifiedSchema, provider, false, opts, allowExisting, managedIfNoPath) =>
+          tableName, userSpecifiedSchema, provider, false, opts, allowExisting, managedIfNoPath) =>
         ExecutedCommand(
           CreateMetastoreDataSource(
             tableName, userSpecifiedSchema, provider, opts, allowExisting, managedIfNoPath)) :: Nil
 
-      case CreateTableUsingAsSelect(tableName, provider, false, mode, opts, query) =>
+      case CreateTableUsingAsSelect(tableName, provider, false, partitionCols, mode, opts, query) =>
         val cmd =
-          CreateMetastoreDataSourceAsSelect(tableName, provider, mode, opts, query)
+          CreateMetastoreDataSourceAsSelect(tableName, provider, partitionCols, mode, opts, query)
         ExecutedCommand(cmd) :: Nil
 
       case _ => Nil
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index abab1a223a43a..8e405e080489f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -158,6 +158,7 @@ private[hive]
 case class CreateMetastoreDataSourceAsSelect(
     tableName: String,
     provider: String,
+    partitionColumns: Array[String],
     mode: SaveMode,
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
@@ -189,12 +190,12 @@ case class CreateMetastoreDataSourceAsSelect(
           return Seq.empty[Row]
         case SaveMode.Append =>
           // Check if the specified data source match the data source of the existing table.
-          val resolved =
-            ResolvedDataSource(sqlContext, Some(query.schema), provider, optionsWithPath)
+          val resolved = ResolvedDataSource(
+            sqlContext, Some(query.schema.asNullable), partitionColumns, provider, optionsWithPath)
           val createdRelation = LogicalRelation(resolved.relation)
           EliminateSubQueries(sqlContext.table(tableName).logicalPlan) match {
-            case l @ LogicalRelation(i: InsertableRelation) =>
-              if (i != createdRelation.relation) {
+            case l @ LogicalRelation(_: InsertableRelation | _: FSBasedRelation) =>
+              if (l.relation != createdRelation.relation) {
                 val errorDescription =
                   s"Cannot append to table $tableName because the resolved relation does not " +
                   s"match the existing relation of $tableName. " +
@@ -202,14 +203,13 @@ case class CreateMetastoreDataSourceAsSelect(
                   s"table $tableName and using its data source and options."
                 val errorMessage =
                   s"""
-                |$errorDescription
-                |== Relations ==
-                |${sideBySide(
-                s"== Expected Relation ==" ::
-                  l.toString :: Nil,
-                s"== Actual Relation ==" ::
-                  createdRelation.toString :: Nil).mkString("\n")}
-              """.stripMargin
+                     |$errorDescription
+                     |== Relations ==
+                     |${sideBySide(
+                        s"== Expected Relation ==" :: l.toString :: Nil,
+                        s"== Actual Relation ==" :: createdRelation.toString :: Nil
+                      ).mkString("\n")}
+                   """.stripMargin
                 throw new AnalysisException(errorMessage)
               }
               existingSchema = Some(l.schema)
@@ -234,7 +234,8 @@ case class CreateMetastoreDataSourceAsSelect(
     }
 
     // Create the relation based on the data of df.
-    val resolved = ResolvedDataSource(sqlContext, provider, mode, optionsWithPath, df)
+    val resolved =
+      ResolvedDataSource(sqlContext, provider, partitionColumns, mode, optionsWithPath, df)
 
     if (createMetastoreTable) {
       // We will use the schema of resolved.relation as the schema of the table (instead of
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index 8398da268174d..cbc381cc81b59 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -204,7 +204,7 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer(
           if (string == null || string.isEmpty) {
             defaultPartName
           } else {
-            FileUtils.escapePathName(string)
+            FileUtils.escapePathName(string, defaultPartName)
           }
         s"/$col=$colString"
       }.mkString
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
new file mode 100644
index 0000000000000..415b1cd168848
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
@@ -0,0 +1,525 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.parquet.ParquetTest
+import org.apache.spark.sql.types._
+
+// TODO Don't extend ParquetTest
+// This test suite extends ParquetTest for some convenient utility methods. These methods should be
+// moved to some more general places, maybe QueryTest.
+class FSBasedRelationSuite extends QueryTest with ParquetTest {
+  override val sqlContext: SQLContext = TestHive
+
+  import sqlContext._
+  import sqlContext.implicits._
+
+  val dataSchema =
+    StructType(
+      Seq(
+        StructField("a", IntegerType, nullable = false),
+        StructField("b", StringType, nullable = false)))
+
+  val testDF = (1 to 3).map(i => (i, s"val_$i")).toDF("a", "b")
+
+  val partitionedTestDF1 = (for {
+    i <- 1 to 3
+    p2 <- Seq("foo", "bar")
+  } yield (i, s"val_$i", 1, p2)).toDF("a", "b", "p1", "p2")
+
+  val partitionedTestDF2 = (for {
+    i <- 1 to 3
+    p2 <- Seq("foo", "bar")
+  } yield (i, s"val_$i", 2, p2)).toDF("a", "b", "p1", "p2")
+
+  val partitionedTestDF = partitionedTestDF1.unionAll(partitionedTestDF2)
+
+  def checkQueries(df: DataFrame): Unit = {
+    // Selects everything
+    checkAnswer(
+      df,
+      for (i <- 1 to 3; p1 <- 1 to 2; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", p1, p2))
+
+    // Simple filtering and partition pruning
+    checkAnswer(
+      df.filter('a > 1 && 'p1 === 2),
+      for (i <- 2 to 3; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", 2, p2))
+
+    // Simple projection and filtering
+    checkAnswer(
+      df.filter('a > 1).select('b, 'a + 1),
+      for (i <- 2 to 3; _ <- 1 to 2; _ <- Seq("foo", "bar")) yield Row(s"val_$i", i + 1))
+
+    // Simple projection and partition pruning
+    checkAnswer(
+      df.filter('a > 1 && 'p1 < 2).select('b, 'p1),
+      for (i <- 2 to 3; _ <- Seq("foo", "bar")) yield Row(s"val_$i", 1))
+
+    // Self-join
+    df.registerTempTable("t")
+    withTempTable("t") {
+      checkAnswer(
+        sql(
+          """SELECT l.a, r.b, l.p1, r.p2
+            |FROM t l JOIN t r
+            |ON l.a = r.a AND l.p1 = r.p1 AND l.p2 = r.p2
+          """.stripMargin),
+        for (i <- 1 to 3; p1 <- 1 to 2; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", p1, p2))
+    }
+  }
+
+  test("save()/load() - non-partitioned table - Overwrite") {
+    withTempPath { file =>
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite)
+
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite)
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        testDF.collect())
+    }
+  }
+
+  test("save()/load() - non-partitioned table - Append") {
+    withTempPath { file =>
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite)
+
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append)
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)).orderBy("a"),
+        testDF.unionAll(testDF).orderBy("a").collect())
+    }
+  }
+
+  test("save()/load() - non-partitioned table - ErrorIfExists") {
+    withTempDir { file =>
+      intercept[RuntimeException] {
+        testDF.save(
+          path = file.getCanonicalPath,
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists)
+      }
+    }
+  }
+
+  test("save()/load() - non-partitioned table - Ignore") {
+    withTempDir { file =>
+      testDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore)
+
+      val path = new Path(file.getCanonicalPath)
+      val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+      assert(fs.listStatus(path).isEmpty)
+    }
+  }
+
+  test("save()/load() - partitioned table - simple queries") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.ErrorIfExists,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkQueries(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)))
+    }
+  }
+
+  test("save()/load() - partitioned table - simple queries - partition columns in data") {
+    withTempDir { file =>
+      val basePath = new Path(file.getCanonicalPath)
+      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
+      val qualifiedBasePath = fs.makeQualified(basePath)
+
+      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
+        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
+        sparkContext
+          .parallelize(for (i <- 1 to 3) yield s"$i,val_$i,$p1")
+          .saveAsTextFile(partitionDir.toString)
+      }
+
+      val dataSchemaWithPartition =
+        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
+
+      checkQueries(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchemaWithPartition.json)))
+    }
+  }
+
+  test("save()/load() - partitioned table - Overwrite") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        partitionedTestDF.collect())
+    }
+  }
+
+  test("save()/load() - partitioned table - Append") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        partitionedTestDF.unionAll(partitionedTestDF).collect())
+    }
+  }
+
+  test("save()/load() - partitioned table - Append - new partition values") {
+    withTempPath { file =>
+      partitionedTestDF1.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      partitionedTestDF2.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      checkAnswer(
+        load(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchema.json)),
+        partitionedTestDF.collect())
+    }
+  }
+
+  test("save()/load() - partitioned table - ErrorIfExists") {
+    withTempDir { file =>
+      intercept[RuntimeException] {
+        partitionedTestDF.save(
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists,
+          options = Map("path" -> file.getCanonicalPath),
+          partitionColumns = Seq("p1", "p2"))
+      }
+    }
+  }
+
+  test("save()/load() - partitioned table - Ignore") {
+    withTempDir { file =>
+      partitionedTestDF.save(
+        path = file.getCanonicalPath,
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore)
+
+      val path = new Path(file.getCanonicalPath)
+      val fs = path.getFileSystem(SparkHadoopUtil.get.conf)
+      assert(fs.listStatus(path).isEmpty)
+    }
+  }
+
+  def withTable(tableName: String)(f: => Unit): Unit = {
+    try f finally sql(s"DROP TABLE $tableName")
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - Overwrite") {
+    testDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      Map("dataSchema" -> dataSchema.json))
+
+    withTable("t") {
+      checkAnswer(table("t"), testDF.collect())
+    }
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - Append") {
+    testDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite)
+
+    testDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Append)
+
+    withTable("t") {
+      checkAnswer(table("t"), testDF.unionAll(testDF).orderBy("a").collect())
+    }
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - ErrorIfExists") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      intercept[AnalysisException] {
+        testDF.saveAsTable(
+          tableName = "t",
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists)
+      }
+    }
+  }
+
+  test("saveAsTable()/load() - non-partitioned table - Ignore") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      testDF.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore)
+
+      assert(table("t").collect().isEmpty)
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - simple queries") {
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      Map("dataSchema" -> dataSchema.json))
+
+    withTable("t") {
+      checkQueries(table("t"))
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Overwrite") {
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    withTable("t") {
+      checkAnswer(table("t"), partitionedTestDF.collect())
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Append") {
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    partitionedTestDF.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Append,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    withTable("t") {
+      checkAnswer(table("t"), partitionedTestDF.unionAll(partitionedTestDF).collect())
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Append - new partition values") {
+    partitionedTestDF1.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    partitionedTestDF2.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Append,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    withTable("t") {
+      checkAnswer(table("t"), partitionedTestDF.collect())
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Append - mismatched partition columns") {
+    partitionedTestDF1.saveAsTable(
+      tableName = "t",
+      source = classOf[SimpleTextSource].getCanonicalName,
+      mode = SaveMode.Overwrite,
+      options = Map("dataSchema" -> dataSchema.json),
+      partitionColumns = Seq("p1", "p2"))
+
+    // Using only a subset of all partition columns
+    intercept[Throwable] {
+      partitionedTestDF2.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("dataSchema" -> dataSchema.json),
+        partitionColumns = Seq("p1"))
+    }
+
+    // Using different order of partition columns
+    intercept[Throwable] {
+      partitionedTestDF2.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Append,
+        options = Map("dataSchema" -> dataSchema.json),
+        partitionColumns = Seq("p2", "p1"))
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - ErrorIfExists") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      intercept[AnalysisException] {
+        partitionedTestDF.saveAsTable(
+          tableName = "t",
+          source = classOf[SimpleTextSource].getCanonicalName,
+          mode = SaveMode.ErrorIfExists,
+          options = Map("dataSchema" -> dataSchema.json),
+          partitionColumns = Seq("p1", "p2"))
+      }
+    }
+  }
+
+  test("saveAsTable()/load() - partitioned table - Ignore") {
+    Seq.empty[(Int, String)].toDF().registerTempTable("t")
+
+    withTempTable("t") {
+      partitionedTestDF.saveAsTable(
+        tableName = "t",
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Ignore,
+        options = Map("dataSchema" -> dataSchema.json),
+        partitionColumns = Seq("p1", "p2"))
+
+      assert(table("t").collect().isEmpty)
+    }
+  }
+
+  test("Hadoop style globbing") {
+    withTempPath { file =>
+      partitionedTestDF.save(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        mode = SaveMode.Overwrite,
+        options = Map("path" -> file.getCanonicalPath),
+        partitionColumns = Seq("p1", "p2"))
+
+      val df = load(
+        source = classOf[SimpleTextSource].getCanonicalName,
+        options = Map(
+          "path" -> s"${file.getCanonicalPath}/p1=*/p2=???",
+          "dataSchema" -> dataSchema.json))
+
+      val expectedPaths = Set(
+        s"${file.getCanonicalFile}/p1=1/p2=foo",
+        s"${file.getCanonicalFile}/p1=2/p2=foo",
+        s"${file.getCanonicalFile}/p1=1/p2=bar",
+        s"${file.getCanonicalFile}/p1=2/p2=bar"
+      ).map { p =>
+        val path = new Path(p)
+        val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+        path.makeQualified(fs.getUri, fs.getWorkingDirectory).toString
+      }
+
+      println(df.queryExecution)
+
+      val actualPaths = df.queryExecution.analyzed.collectFirst {
+        case LogicalRelation(relation: FSBasedRelation) =>
+          relation.paths.toSet
+      }.getOrElse {
+        fail("Expect an FSBasedRelation, but none could be found")
+      }
+
+      assert(actualPaths === expectedPaths)
+      checkAnswer(df, partitionedTestDF.collect())
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
new file mode 100644
index 0000000000000..8801aba2f64c3
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.text.NumberFormat
+import java.util.UUID
+
+import com.google.common.base.Objects
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{NullWritable, Text}
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat, TextOutputFormat}
+import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext}
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Row, SQLContext}
+
+/**
+ * A simple example [[FSBasedRelationProvider]].
+ */
+class SimpleTextSource extends FSBasedRelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      paths: Array[String],
+      schema: Option[StructType],
+      partitionColumns: Option[StructType],
+      parameters: Map[String, String]): FSBasedRelation = {
+    val partitionsSchema = partitionColumns.getOrElse(StructType(Array.empty[StructField]))
+    new SimpleTextRelation(paths, schema, partitionsSchema, parameters)(sqlContext)
+  }
+}
+
+class AppendingTextOutputFormat(outputFile: Path) extends TextOutputFormat[NullWritable, Text] {
+  val numberFormat = NumberFormat.getInstance()
+
+  numberFormat.setMinimumIntegerDigits(5)
+  numberFormat.setGroupingUsed(false)
+
+  override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
+    val split = context.getTaskAttemptID.getTaskID.getId
+    val name = FileOutputFormat.getOutputName(context)
+    new Path(outputFile, s"$name-${numberFormat.format(split)}-${UUID.randomUUID()}")
+  }
+}
+
+class SimpleTextOutputWriter extends OutputWriter {
+  private var recordWriter: RecordWriter[NullWritable, Text] = _
+  private var taskAttemptContext: TaskAttemptContext = _
+
+  override def init(
+      path: String,
+      dataSchema: StructType,
+      context: TaskAttemptContext): Unit = {
+    recordWriter = new AppendingTextOutputFormat(new Path(path)).getRecordWriter(context)
+    taskAttemptContext = context
+  }
+
+  override def write(row: Row): Unit = {
+    val serialized = row.toSeq.map(_.toString).mkString(",")
+    recordWriter.write(null, new Text(serialized))
+  }
+
+  override def close(): Unit = recordWriter.close(taskAttemptContext)
+}
+
+/**
+ * A simple example [[FSBasedRelation]], used for testing purposes.  Data are stored as comma
+ * separated string lines.  When scanning data, schema must be explicitly provided via data source
+ * option `"dataSchema"`.
+ */
+class SimpleTextRelation(
+    paths: Array[String],
+    val maybeDataSchema: Option[StructType],
+    partitionsSchema: StructType,
+    parameters: Map[String, String])(
+    @transient val sqlContext: SQLContext)
+  extends FSBasedRelation(paths, partitionsSchema) {
+
+  import sqlContext.sparkContext
+
+  override val dataSchema: StructType =
+    maybeDataSchema.getOrElse(DataType.fromJson(parameters("dataSchema")).asInstanceOf[StructType])
+
+  override def equals(other: Any): Boolean = other match {
+    case that: SimpleTextRelation =>
+      this.paths.sameElements(that.paths) &&
+        this.maybeDataSchema == that.maybeDataSchema &&
+        this.dataSchema == that.dataSchema &&
+        this.partitionColumns == that.partitionColumns
+
+    case _ => false
+  }
+
+  override def hashCode(): Int =
+    Objects.hashCode(paths, maybeDataSchema, dataSchema)
+
+  override def outputWriterClass: Class[_ <: OutputWriter] =
+    classOf[SimpleTextOutputWriter]
+
+  override def buildScan(inputPaths: Array[String]): RDD[Row] = {
+    val fields = dataSchema.map(_.dataType)
+
+    sparkContext.textFile(inputPaths.mkString(",")).map { record =>
+      Row(record.split(",").zip(fields).map { case (value, dataType) =>
+        Cast(Literal(value), dataType).eval()
+      }: _*)
+    }
+  }
+}

From bfcaf8adcdc20dec203e2e9d5a72b52dd6f226a9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Tue, 12 May 2015 10:37:57 -0700
Subject: [PATCH 110/320] [DataFrame][minor] support column in field accessor

Minor improvement, now we can use `Column` as extraction expression.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6080 from cloud-fan/tmp and squashes the following commits:

0fdefb7 [Wenchen Fan] support column in field accessor
---
 sql/core/src/main/scala/org/apache/spark/sql/Column.scala       | 2 +-
 .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index e6e475bb82f82..4d50821620f5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -78,7 +78,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    *
    * @group expr_ops
    */
-  def apply(field: Any): Column = UnresolvedExtractValue(expr, Literal(field))
+  def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr)
 
   /**
    * Unary minus, i.e. negate the expression.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 2ade955864b71..d58438e5d129c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -456,6 +456,7 @@ class DataFrameSuite extends QueryTest {
     assert(complexData.filter(complexData("a")(0) === 2).count() == 1)
     assert(complexData.filter(complexData("m")("1") === 1).count() == 1)
     assert(complexData.filter(complexData("s")("key") === 1).count() == 1)
+    assert(complexData.filter(complexData("m")(complexData("s")("value")) === 1).count() == 1)
   }
 
   test("SPARK-7324 dropDuplicates") {

From 65697bbeafe507dda066e2dc14ca5183f278dfe9 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Tue, 12 May 2015 11:17:59 -0700
Subject: [PATCH 111/320] [SPARK-7500] DAG visualization: move cluster labeling
 to dagre-d3

This fixes the label bleeding issue described in the JIRA and pictured in the screenshots below. I also took the opportunity to move some code to the places that they belong more to. In particular:

(1) Drawing cluster labels is now implemented in my branch of dagre-d3 instead of in Spark
(2) All graph styling is now moved from Scala to JS

Note that these changes are related because our existing mechanism of "tacking on cluster labels" afterwards isn't flexible enough for us to fix issues like this one easily. For the other half of the changes, visit http://github.com/andrewor14/dagre-d3.

-------------------

**Before.**
<img src="https://cloud.githubusercontent.com/assets/2133137/7582769/b1423440-f845-11e4-8248-b3446a01bf79.png" width="300px"/>

-------------------

**After.**
<img src="https://cloud.githubusercontent.com/assets/2133137/7582742/74891ae6-f845-11e4-96c4-41c7b8aedbdf.png" width="400px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6076 from andrewor14/dag-viz-bleed and squashes the following commits:

5858d7a [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-bleed
c686dc4 [Andrew Or] Fix tooltip placement
d908c36 [Andrew Or] Add link to dagre-d3 changes (minor)
4a4fb58 [Andrew Or] Fix bleeding + move all styling to JS
---
 .../apache/spark/ui/static/dagre-d3.min.js    | 14 ++--
 .../apache/spark/ui/static/spark-dag-viz.js   | 79 ++++++++-----------
 .../scala/org/apache/spark/ui/UIUtils.scala   |  2 +-
 .../spark/ui/scope/RDDOperationGraph.scala    | 40 +++-------
 4 files changed, 48 insertions(+), 87 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
index 6beb7db855ca0..acf2d93b718b2 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
@@ -1,4 +1,4 @@
-!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagreD3=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){/**
+/* This is a custom version of dagre-d3 on top of v0.4.3. The full list of commits can be found at http://github.com/andrewor14/dagre-d3/ */!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagreD3=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){/**
  * @license
  * Copyright (c) 2012-2013 Chris Pettitt
  *
@@ -20,12 +20,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});svgClusters.enter().append("g").attr("id",function(v){return"cluster_"+v.replace(/^cluster/,"")}).attr("name",function(v){return g.node(v).label}).attr("class","cluster").style("opacity",0).append("rect");util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop})}},{"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).attr("class","node").style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var shapeBBox=shapeSvg.node().getBBox();node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));
+module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});var makeClusterIdentifier=function(v){return"cluster_"+v.replace(/^cluster/,"")};svgClusters.enter().append("g").attr("id",makeClusterIdentifier).attr("name",function(v){return g.node(v).label}).classed("cluster",true).style("opacity",0).append("rect");var sortedClusters=util.orderByRank(g,svgClusters.data());for(var i=0;i<sortedClusters.length;i++){var v=sortedClusters[i];var node=g.node(v);if(node.label){var thisGroup=selection.select("g.cluster#"+makeClusterIdentifier(v));labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),bbox=_.pick(labelDom.node().getBBox(),"width","height");node.paddingTop+=bbox.height;node.paddingTop+=util.getMaxChildPaddingTop(g,v)}}util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop});svgClusters.each(function(){var cluster=d3.select(this),label=cluster.select("g.label"),rect=cluster.select("rect"),bbox=label.node().getBBox(),labelW=bbox.width,labelH=bbox.height;var num=function(x){return parseFloat(x.toString().replace(/px$/,""))};var labelX=num(rect.attr("x"))+num(rect.attr("width"))-labelH/2-labelW/2;var labelY=num(rect.attr("y"))+labelH;label.attr("transform","translate("+labelX+","+labelY+")")})}},{"./label/add-label":18,"./lodash":20,"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).classed("node",true).style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var requiredWidth=0,requiredHeight=0;var nextNode=g.node(g.parent(v));while(nextNode){var tempGroup=thisGroup.append("g");var tempLabel=addLabel(tempGroup,nextNode);var tempBBox=tempLabel.node().getBBox();tempBBox.width-=50;requiredWidth=Math.max(requiredWidth,tempBBox.width);requiredHeight=Math.max(requiredHeight,tempBBox.height);tempLabel.remove();nextNode=g.node(g.parent(nextNode.label))}var shapeBBox=shapeSvg.node().getBBox();shapeBBox.width=Math.max(shapeBBox.width,requiredWidth);shapeBBox.height=Math.max(shapeBBox.height,requiredHeight);node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,getMaxChildPaddingTop:getMaxChildPaddingTop,orderByRank:orderByRank,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function getMaxChildPaddingTop(g,v){var maxPadding=0;var children=g.children(v);for(var i=0;i<children.length;i++){var child=g.node(children[i]);if(child.paddingTop&&child.paddingTop>maxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;i<children.length;i++){var thisRank=getRank(g,children[i])+1;if(thisRank>maxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){
+parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;
 
-_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;
+if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{};
 
-do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length<arity){bitmask|=16&~32;return baseCreateWrapper([func,isCurryBound?bitmask:bitmask&~3,args,null,thisArg,arity])}}args||(args=arguments);if(isBindKey){func=thisBinding[key]}if(this instanceof bound){thisBinding=baseCreate(func.prototype);var result=func.apply(thisBinding,args);return isObject(result)?result:thisBinding}return func.apply(thisBinding,args)}setBindData(bound,bindData);return bound}function baseDifference(array,values){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,isLarge=length>=largeArraySize&&indexOf===baseIndexOf,result=[];
-
-if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index<length){var value=array[index];if(indexOf(values,value)<0){result.push(value)}}if(isLarge){releaseObject(values)}return result}function baseFlatten(array,isShallow,isStrict,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value&&typeof value=="object"&&typeof value.length=="number"&&(isArray(value)||isArguments(value))){if(!isShallow){value=baseFlatten(value,isShallow,isStrict)}var valIndex=-1,valLength=value.length,resIndex=result.length;result.length+=valLength;while(++valIndex<valLength){result[resIndex++]=value[valIndex]}}else if(!isStrict){result.push(value)}}return result}function baseIsEqual(a,b,callback,isWhere,stackA,stackB){if(callback){var result=callback(a,b);if(typeof result!="undefined"){return!!result}}if(a===b){return a!==0||1/a==1/b}var type=typeof a,otherType=typeof b;if(a===a&&!(a&&objectTypes[type])&&!(b&&objectTypes[otherType])){return false}if(a==null||b==null){return a===b}var className=toString.call(a),otherClass=toString.call(b);if(className==argsClass){className=objectClass}if(otherClass==argsClass){otherClass=objectClass}if(className!=otherClass){return false}switch(className){case boolClass:case dateClass:return+a==+b;case numberClass:return a!=+a?b!=+b:a==0?1/a==1/b:a==+b;case regexpClass:case stringClass:return a==String(b)}var isArr=className==arrayClass;if(!isArr){var aWrapped=hasOwnProperty.call(a,"__wrapped__"),bWrapped=hasOwnProperty.call(b,"__wrapped__");if(aWrapped||bWrapped){return baseIsEqual(aWrapped?a.__wrapped__:a,bWrapped?b.__wrapped__:b,callback,isWhere,stackA,stackB)}if(className!=objectClass){return false}var ctorA=a.constructor,ctorB=b.constructor;if(ctorA!=ctorB&&!(isFunction(ctorA)&&ctorA instanceof ctorA&&isFunction(ctorB)&&ctorB instanceof ctorB)&&("constructor"in a&&"constructor"in b)){return false}}var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==a){return stackB[length]==b}}var size=0;result=true;stackA.push(a);stackB.push(b);if(isArr){length=a.length;size=b.length;result=size==length;if(result||isWhere){while(size--){var index=length,value=b[size];if(isWhere){while(index--){if(result=baseIsEqual(a[index],value,callback,isWhere,stackA,stackB)){break}}}else if(!(result=baseIsEqual(a[size],value,callback,isWhere,stackA,stackB))){break}}}}else{forIn(b,function(value,key,b){if(hasOwnProperty.call(b,key)){size++;return result=hasOwnProperty.call(a,key)&&baseIsEqual(a[key],value,callback,isWhere,stackA,stackB)}});if(result&&!isWhere){forIn(a,function(value,key,a){if(hasOwnProperty.call(a,key)){return result=--size>-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index<length){var value=array[index],computed=callback?callback(value,index,array):value;if(isSorted?!index||seen[seen.length-1]!==computed:indexOf(seen,computed)<0){if(callback||isLarge){seen.push(computed)}result.push(value)}}if(isLarge){releaseArray(seen.array);releaseObject(seen)}else if(callback){releaseArray(seen)}return result}function createAggregator(setter){return function(collection,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];setter(result,value,callback(value,index,collection),collection)}}else{forOwn(collection,function(value,key,collection){setter(result,value,callback(value,key,collection),collection)})}return result}}function createWrapper(func,bitmask,partialArgs,partialRightArgs,thisArg,arity){var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,isPartial=bitmask&16,isPartialRight=bitmask&32;if(!isBindKey&&!isFunction(func)){throw new TypeError}if(isPartial&&!partialArgs.length){bitmask&=~16;isPartial=partialArgs=false}if(isPartialRight&&!partialRightArgs.length){bitmask&=~32;isPartialRight=partialRightArgs=false}var bindData=func&&func.__bindData__;if(bindData&&bindData!==true){bindData=slice(bindData);if(bindData[2]){bindData[2]=slice(bindData[2])}if(bindData[3]){bindData[3]=slice(bindData[3])}if(isBind&&!(bindData[1]&1)){bindData[4]=thisArg}if(!isBind&&bindData[1]&1){bitmask|=8}if(isCurry&&!(bindData[1]&4)){bindData[5]=arity}if(isPartial){push.apply(bindData[2]||(bindData[2]=[]),partialArgs)}if(isPartialRight){unshift.apply(bindData[3]||(bindData[3]=[]),partialRightArgs)}bindData[1]|=bitmask;return createWrapper.apply(null,bindData)}var creater=bitmask==1||bitmask===17?baseBind:baseCreateWrapper;return creater([func,bitmask,partialArgs,partialRightArgs,thisArg,arity])}function escapeHtmlChar(match){return htmlEscapes[match]}function getIndexOf(){var result=(result=lodash.indexOf)===indexOf?baseIndexOf:result;return result}function isNative(value){return typeof value=="function"&&reNative.test(value)}var setBindData=!defineProperty?noop:function(func,value){descriptor.value=value;defineProperty(func,"__bindData__",descriptor)};function shimIsPlainObject(value){var ctor,result;if(!(value&&toString.call(value)==objectClass)||(ctor=value.constructor,isFunction(ctor)&&!(ctor instanceof ctor))){return false}forIn(value,function(value,key){result=key});return typeof result=="undefined"||hasOwnProperty.call(value,result)}function unescapeHtmlChar(match){return htmlUnescapes[match]}function isArguments(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==argsClass||false}var isArray=nativeIsArray||function(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==arrayClass||false};var shimKeys=function(object){var index,iterable=object,result=[];if(!iterable)return result;if(!objectTypes[typeof object])return result;for(index in iterable){if(hasOwnProperty.call(iterable,index)){result.push(index)}}return result};var keys=!nativeKeys?shimKeys:function(object){if(!isObject(object)){return[]}return nativeKeys(object)};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];result[index]=callback?callback(result[index],iterable[index]):iterable[index]}}}return result};function clone(value,isDeep,callback,thisArg){if(typeof isDeep!="boolean"&&isDeep!=null){thisArg=callback;callback=isDeep;isDeep=false}return baseClone(value,isDeep,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function cloneDeep(value,callback,thisArg){return baseClone(value,true,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function create(prototype,properties){var result=baseCreate(prototype);return properties?assign(result,properties):result}var defaults=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(typeof result[index]=="undefined")result[index]=iterable[index]}}}return result};function findKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}function findLastKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwnRight(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}var forIn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);for(index in iterable){if(callback(iterable[index],index,collection)===false)return result}return result};function forInRight(object,callback,thisArg){var pairs=[];forIn(object,function(value,key){pairs.push(key,value)});var length=pairs.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){if(callback(pairs[length--],pairs[length],object)===false){break}}return object}var forOwn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(callback(iterable[index],index,collection)===false)return result}return result};function forOwnRight(object,callback,thisArg){var props=keys(object),length=props.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){var key=props[length];if(callback(object[key],key,object)===false){break}}return object}function functions(object){var result=[];forIn(object,function(value,key){if(isFunction(value)){result.push(key)}});return result.sort()}function has(object,key){return object?hasOwnProperty.call(object,key):false}function invert(object){var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index];result[object[key]]=key}return result}function isBoolean(value){return value===true||value===false||value&&typeof value=="object"&&toString.call(value)==boolClass||false}function isDate(value){return value&&typeof value=="object"&&toString.call(value)==dateClass||false}function isElement(value){return value&&value.nodeType===1||false}function isEmpty(value){var result=true;if(!value){return result}var className=toString.call(value),length=value.length;if(className==arrayClass||className==stringClass||className==argsClass||className==objectClass&&typeof length=="number"&&isFunction(value.splice)){return!length}forOwn(value,function(){return result=false});return result}function isEqual(a,b,callback,thisArg){return baseIsEqual(a,b,typeof callback=="function"&&baseCreateCallback(callback,thisArg,2))}function isFinite(value){return nativeIsFinite(value)&&!nativeIsNaN(parseFloat(value))}function isFunction(value){return typeof value=="function"}function isObject(value){return!!(value&&objectTypes[typeof value])}function isNaN(value){return isNumber(value)&&value!=+value}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||value&&typeof value=="object"&&toString.call(value)==numberClass||false}var isPlainObject=!getPrototypeOf?shimIsPlainObject:function(value){if(!(value&&toString.call(value)==objectClass)){return false}var valueOf=value.valueOf,objProto=isNative(valueOf)&&(objProto=getPrototypeOf(valueOf))&&getPrototypeOf(objProto);return objProto?value==objProto||getPrototypeOf(value)==objProto:shimIsPlainObject(value)};function isRegExp(value){return value&&typeof value=="object"&&toString.call(value)==regexpClass||false}function isString(value){return typeof value=="string"||value&&typeof value=="object"&&toString.call(value)==stringClass||false}function isUndefined(value){return typeof value=="undefined"}function mapValues(object,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){result[key]=callback(value,key,object)});return result}function merge(object){var args=arguments,length=2;if(!isObject(object)){return object}if(typeof args[2]!="number"){length=args.length}if(length>3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index<length){baseMerge(object,sources[index],callback,stackA,stackB)}releaseArray(stackA);releaseArray(stackB);return object}function omit(object,callback,thisArg){var result={};if(typeof callback!="function"){var props=[];forIn(object,function(value,key){props.push(key)});props=baseDifference(props,baseFlatten(arguments,true,false,1));var index=-1,length=props.length;while(++index<length){var key=props[index];result[key]=object[key]}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(!callback(value,key,object)){result[key]=value}})}return result}function pairs(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}function pick(object,callback,thisArg){var result={};if(typeof callback!="function"){var index=-1,props=baseFlatten(arguments,true,false,1),length=isObject(object)?props.length:0;while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(callback(value,key,object)){result[key]=value}})}return result}function transform(object,callback,accumulator,thisArg){var isArr=isArray(object);if(accumulator==null){if(isArr){accumulator=[]}else{var ctor=object&&object.constructor,proto=ctor&&ctor.prototype;accumulator=baseCreate(proto)}}if(callback){callback=lodash.createCallback(callback,thisArg,4);(isArr?forEach:forOwn)(object,function(value,index,object){return callback(accumulator,value,index,object)})}return accumulator}function values(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function at(collection){var args=arguments,index=-1,props=baseFlatten(args,true,false,1),length=args[2]&&args[2][args[1]]===collection?1:props.length,result=Array(length);while(++index<length){result[index]=collection[props[index]]}return result}function contains(collection,target,fromIndex){var index=-1,indexOf=getIndexOf(),length=collection?collection.length:0,result=false;fromIndex=(fromIndex<0?nativeMax(0,length+fromIndex):fromIndex)||0;if(isArray(collection)){result=indexOf(collection,target,fromIndex)>-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(!(result=!!callback(collection[index],index,collection))){break}}}else{forOwn(collection,function(value,index,collection){return result=!!callback(value,index,collection)})}return result}function filter(collection,callback,thisArg){var result=[];callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){result.push(value)}}}else{forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result.push(value)}})}return result}function find(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){return value}}}else{var result;forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}}function findLast(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forEachRight(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}function forEach(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(++index<length){if(callback(collection[index],index,collection)===false){break}}}else{forOwn(collection,callback)}return collection}function forEachRight(collection,callback,thisArg){var length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(length--){if(callback(collection[length],length,collection)===false){break}}}else{var props=keys(collection);length=props.length;forOwn(collection,function(value,key,collection){key=props?props[--length]:--length;return callback(collection[key],key,collection)})}return collection}var groupBy=createAggregator(function(result,value,key){(hasOwnProperty.call(result,key)?result[key]:result[key]=[]).push(value)});var indexBy=createAggregator(function(result,value,key){result[key]=value});function invoke(collection,methodName){var args=slice(arguments,2),index=-1,isFunc=typeof methodName=="function",length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){result[++index]=(isFunc?methodName:value[methodName]).apply(value,args)});return result}function map(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=lodash.createCallback(callback,thisArg,3);if(typeof length=="number"){var result=Array(length);while(++index<length){result[index]=callback(collection[index],index,collection)}}else{result=[];forOwn(collection,function(value,key,collection){result[++index]=callback(value,key,collection)})}return result}function max(collection,callback,thisArg){var computed=-Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value>result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value<result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current<computed){computed=current;result=value}})}return result}var pluck=map;function reduce(collection,callback,accumulator,thisArg){if(!collection)return accumulator;var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);var index=-1,length=collection.length;if(typeof length=="number"){if(noaccum){accumulator=collection[++index]}while(++index<length){accumulator=callback(accumulator,collection[index],index,collection)}}else{forOwn(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)})}return accumulator}function reduceRight(collection,callback,accumulator,thisArg){var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);forEachRight(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)});return accumulator}function reject(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);return filter(collection,function(value,index,collection){return!callback(value,index,collection)})}function sample(collection,n,guard){if(collection&&typeof collection.length!="number"){collection=values(collection)}if(n==null||guard){return collection?collection[baseRandom(0,collection.length-1)]:undefined}var result=shuffle(collection);result.length=nativeMin(nativeMax(0,n),result.length);return result}function shuffle(collection){var index=-1,length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){var rand=baseRandom(0,++index);result[index]=result[rand];result[rand]=value});return result}function size(collection){var length=collection?collection.length:0;return typeof length=="number"?length:keys(collection).length}function some(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(result=callback(collection[index],index,collection)){break}}}else{forOwn(collection,function(value,index,collection){return!(result=callback(value,index,collection))})}return!!result}function sortBy(collection,callback,thisArg){var index=-1,isArr=isArray(callback),length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);if(!isArr){callback=lodash.createCallback(callback,thisArg,3)}forEach(collection,function(value,key,collection){var object=result[++index]=getObject();if(isArr){object.criteria=map(callback,function(key){return value[key]})}else{(object.criteria=getArray())[0]=callback(value,key,collection)}object.index=index;object.value=value});length=result.length;result.sort(compareAscending);while(length--){var object=result[length];result[length]=object.value;if(!isArr){releaseArray(object.criteria)}releaseObject(object)}return result}function toArray(collection){if(collection&&typeof collection.length=="number"){return slice(collection)}return values(collection)}var where=filter;function compact(array){var index=-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value){result.push(value)}}return result}function difference(array){return baseDifference(array,baseFlatten(arguments,true,true,1))}function findIndex(array,callback,thisArg){var index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length){if(callback(array[index],index,array)){return index}}return-1}function findLastIndex(array,callback,thisArg){var length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(length--){if(callback(array[length],length,array)){return length}}return-1}function first(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=-1;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[0]:undefined}}return slice(array,0,nativeMin(nativeMax(0,n),length))}function flatten(array,isShallow,callback,thisArg){if(typeof isShallow!="boolean"&&isShallow!=null){thisArg=callback;callback=typeof isShallow!="function"&&thisArg&&thisArg[isShallow]===array?null:isShallow;isShallow=false}if(callback!=null){array=map(array,callback,thisArg)}return baseFlatten(array,isShallow)}function indexOf(array,value,fromIndex){if(typeof fromIndex=="number"){var length=array?array.length:0;fromIndex=fromIndex<0?nativeMax(0,length+fromIndex):fromIndex||0}else if(fromIndex){var index=sortedIndex(array,value);return array[index]===value?index:-1}return baseIndexOf(array,value,fromIndex)}function initial(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:callback||n}return slice(array,0,nativeMin(nativeMax(0,length-n),length))}function intersection(){var args=[],argsIndex=-1,argsLength=arguments.length,caches=getArray(),indexOf=getIndexOf(),trustIndexOf=indexOf===baseIndexOf,seen=getArray();while(++argsIndex<argsLength){var value=arguments[argsIndex];if(isArray(value)||isArguments(value)){args.push(value);caches.push(trustIndexOf&&value.length>=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index<length){var cache=caches[0];value=array[index];if((cache?cacheIndexOf(cache,value):indexOf(seen,value))<0){argsIndex=argsLength;(cache||seen).push(value);while(--argsIndex){cache=caches[argsIndex];if((cache?cacheIndexOf(cache,value):indexOf(args[argsIndex],value))<0){continue outer}}result.push(value)}}while(argsLength--){cache=caches[argsLength];if(cache){releaseObject(cache)}}releaseArray(caches);releaseArray(seen);return result}function last(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[length-1]:undefined}}return slice(array,nativeMax(0,length-n))}function lastIndexOf(array,value,fromIndex){var index=array?array.length:0;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(0,index+fromIndex):nativeMin(fromIndex,index-1))+1}while(index--){if(array[index]===value){return index}}return-1}function pull(array){var args=arguments,argsIndex=0,argsLength=args.length,length=array?array.length:0;while(++argsIndex<argsLength){var index=-1,value=args[argsIndex];while(++index<length){if(array[index]===value){splice.call(array,index--,1);length--}}}return array}function range(start,end,step){start=+start||0;step=typeof step=="number"?step:+step||1;if(end==null){end=start;start=0}var index=-1,length=nativeMax(0,ceil((end-start)/(step||1))),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function remove(array,callback,thisArg){var index=-1,length=array?array.length:0,result=[];callback=lodash.createCallback(callback,thisArg,3);while(++index<length){var value=array[index];if(callback(value,index,array)){result.push(value);splice.call(array,index--,1);length--}}return result}function rest(array,callback,thisArg){if(typeof callback!="number"&&callback!=null){var n=0,index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:nativeMax(0,callback)}return slice(array,n)}function sortedIndex(array,value,callback,thisArg){var low=0,high=array?array.length:low;callback=callback?lodash.createCallback(callback,thisArg,1):identity;value=callback(value);while(low<high){var mid=low+high>>>1;callback(array[mid])<value?low=mid+1:high=mid}return low}function union(){return baseUniq(baseFlatten(arguments,true,true))}function uniq(array,isSorted,callback,thisArg){if(typeof isSorted!="boolean"&&isSorted!=null){thisArg=callback;callback=typeof isSorted!="function"&&thisArg&&thisArg[isSorted]===array?null:isSorted;isSorted=false}if(callback!=null){callback=lodash.createCallback(callback,thisArg,3)}return baseUniq(array,isSorted,callback)}function without(array){return baseDifference(array,slice(arguments,1))}function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArray(array)||isArguments(array)){var result=result?baseUniq(baseDifference(result,array).concat(baseDifference(array,result))):array}}return result||[]}function zip(){var array=arguments.length>1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index<length){result[index]=pluck(array,index)}return result}function zipObject(keys,values){var index=-1,length=keys?keys.length:0,result={};if(!values&&length&&!isArray(keys[0])){values=[]}while(++index<length){var key=keys[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}function after(n,func){if(!isFunction(func)){throw new TypeError}return function(){if(--n<1){return func.apply(this,arguments)}}}function bind(func,thisArg){return arguments.length>2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index<length){var key=funcs[index];object[key]=createWrapper(object[key],1,null,null,object)}return object}function bindKey(object,key){return arguments.length>2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId);
-
-}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index<n){result[index]=callback(index)}return result}function unescape(string){return string==null?"":String(string).replace(reEscapedHtml,unescapeHtmlChar)}function uniqueId(prefix){var id=++idCounter;return String(prefix==null?"":prefix)+id}function chain(value){value=new lodashWrapper(value);value.__chain__=true;return value}function tap(value,interceptor){interceptor(value);return value}function wrapperChain(){this.__chain__=true;return this}function wrapperToString(){return String(this.__wrapped__)}function wrapperValueOf(){return this.__wrapped__}lodash.after=after;lodash.assign=assign;lodash.at=at;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.chain=chain;lodash.compact=compact;lodash.compose=compose;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.createCallback=createCallback;lodash.curry=curry;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.filter=filter;lodash.flatten=flatten;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.map=map;lodash.mapValues=mapValues;lodash.max=max;lodash.memoize=memoize;lodash.merge=merge;lodash.min=min;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.pull=pull;lodash.range=range;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.shuffle=shuffle;lodash.sortBy=sortBy;lodash.tap=tap;lodash.throttle=throttle;lodash.times=times;lodash.toArray=toArray;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.values=values;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.collect=map;lodash.drop=rest;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;lodash.unzip=zip;mixin(lodash);lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.contains=contains;lodash.escape=escape;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.has=has;lodash.identity=identity;lodash.indexOf=indexOf;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isNaN=isNaN;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isUndefined=isUndefined;lodash.lastIndexOf=lastIndexOf;lodash.mixin=mixin;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.result=result;lodash.runInContext=runInContext;lodash.size=size;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.template=template;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.all=every;lodash.any=some;lodash.detect=find;lodash.findWhere=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.include=contains;lodash.inject=reduce;mixin(function(){var source={};forOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.first=first;lodash.last=last;lodash.sample=sample;lodash.take=first;lodash.head=first;forOwn(lodash,function(func,methodName){var callbackable=methodName!=="sample";if(!lodash.prototype[methodName]){lodash.prototype[methodName]=function(n,guard){var chainAll=this.__chain__,result=func(this.__wrapped__,n,guard);return!chainAll&&(n==null||guard&&!(callbackable&&typeof n=="function"))?result:new lodashWrapper(result,chainAll)}}});lodash.VERSION="2.4.1";lodash.prototype.chain=wrapperChain;lodash.prototype.toString=wrapperToString;lodash.prototype.value=wrapperValueOf;lodash.prototype.valueOf=wrapperValueOf;forEach(["join","pop","shift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){var chainAll=this.__chain__,result=func.apply(this.__wrapped__,arguments);return chainAll?new lodashWrapper(result,chainAll):result}});forEach(["push","reverse","sort","unshift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){func.apply(this.__wrapped__,arguments);return this}});forEach(["concat","slice","splice"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){return new lodashWrapper(func.apply(this.__wrapped__,arguments),this.__chain__)}});return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
\ No newline at end of file
+}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length<arity){bitmask|=16&~32;return baseCreateWrapper([func,isCurryBound?bitmask:bitmask&~3,args,null,thisArg,arity])}}args||(args=arguments);if(isBindKey){func=thisBinding[key]}if(this instanceof bound){thisBinding=baseCreate(func.prototype);var result=func.apply(thisBinding,args);return isObject(result)?result:thisBinding}return func.apply(thisBinding,args)}setBindData(bound,bindData);return bound}function baseDifference(array,values){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,isLarge=length>=largeArraySize&&indexOf===baseIndexOf,result=[];if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index<length){var value=array[index];if(indexOf(values,value)<0){result.push(value)}}if(isLarge){releaseObject(values)}return result}function baseFlatten(array,isShallow,isStrict,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value&&typeof value=="object"&&typeof value.length=="number"&&(isArray(value)||isArguments(value))){if(!isShallow){value=baseFlatten(value,isShallow,isStrict)}var valIndex=-1,valLength=value.length,resIndex=result.length;result.length+=valLength;while(++valIndex<valLength){result[resIndex++]=value[valIndex]}}else if(!isStrict){result.push(value)}}return result}function baseIsEqual(a,b,callback,isWhere,stackA,stackB){if(callback){var result=callback(a,b);if(typeof result!="undefined"){return!!result}}if(a===b){return a!==0||1/a==1/b}var type=typeof a,otherType=typeof b;if(a===a&&!(a&&objectTypes[type])&&!(b&&objectTypes[otherType])){return false}if(a==null||b==null){return a===b}var className=toString.call(a),otherClass=toString.call(b);if(className==argsClass){className=objectClass}if(otherClass==argsClass){otherClass=objectClass}if(className!=otherClass){return false}switch(className){case boolClass:case dateClass:return+a==+b;case numberClass:return a!=+a?b!=+b:a==0?1/a==1/b:a==+b;case regexpClass:case stringClass:return a==String(b)}var isArr=className==arrayClass;if(!isArr){var aWrapped=hasOwnProperty.call(a,"__wrapped__"),bWrapped=hasOwnProperty.call(b,"__wrapped__");if(aWrapped||bWrapped){return baseIsEqual(aWrapped?a.__wrapped__:a,bWrapped?b.__wrapped__:b,callback,isWhere,stackA,stackB)}if(className!=objectClass){return false}var ctorA=a.constructor,ctorB=b.constructor;if(ctorA!=ctorB&&!(isFunction(ctorA)&&ctorA instanceof ctorA&&isFunction(ctorB)&&ctorB instanceof ctorB)&&("constructor"in a&&"constructor"in b)){return false}}var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==a){return stackB[length]==b}}var size=0;result=true;stackA.push(a);stackB.push(b);if(isArr){length=a.length;size=b.length;result=size==length;if(result||isWhere){while(size--){var index=length,value=b[size];if(isWhere){while(index--){if(result=baseIsEqual(a[index],value,callback,isWhere,stackA,stackB)){break}}}else if(!(result=baseIsEqual(a[size],value,callback,isWhere,stackA,stackB))){break}}}}else{forIn(b,function(value,key,b){if(hasOwnProperty.call(b,key)){size++;return result=hasOwnProperty.call(a,key)&&baseIsEqual(a[key],value,callback,isWhere,stackA,stackB)}});if(result&&!isWhere){forIn(a,function(value,key,a){if(hasOwnProperty.call(a,key)){return result=--size>-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index<length){var value=array[index],computed=callback?callback(value,index,array):value;if(isSorted?!index||seen[seen.length-1]!==computed:indexOf(seen,computed)<0){if(callback||isLarge){seen.push(computed)}result.push(value)}}if(isLarge){releaseArray(seen.array);releaseObject(seen)}else if(callback){releaseArray(seen)}return result}function createAggregator(setter){return function(collection,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];setter(result,value,callback(value,index,collection),collection)}}else{forOwn(collection,function(value,key,collection){setter(result,value,callback(value,key,collection),collection)})}return result}}function createWrapper(func,bitmask,partialArgs,partialRightArgs,thisArg,arity){var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,isPartial=bitmask&16,isPartialRight=bitmask&32;if(!isBindKey&&!isFunction(func)){throw new TypeError}if(isPartial&&!partialArgs.length){bitmask&=~16;isPartial=partialArgs=false}if(isPartialRight&&!partialRightArgs.length){bitmask&=~32;isPartialRight=partialRightArgs=false}var bindData=func&&func.__bindData__;if(bindData&&bindData!==true){bindData=slice(bindData);if(bindData[2]){bindData[2]=slice(bindData[2])}if(bindData[3]){bindData[3]=slice(bindData[3])}if(isBind&&!(bindData[1]&1)){bindData[4]=thisArg}if(!isBind&&bindData[1]&1){bitmask|=8}if(isCurry&&!(bindData[1]&4)){bindData[5]=arity}if(isPartial){push.apply(bindData[2]||(bindData[2]=[]),partialArgs)}if(isPartialRight){unshift.apply(bindData[3]||(bindData[3]=[]),partialRightArgs)}bindData[1]|=bitmask;return createWrapper.apply(null,bindData)}var creater=bitmask==1||bitmask===17?baseBind:baseCreateWrapper;return creater([func,bitmask,partialArgs,partialRightArgs,thisArg,arity])}function escapeHtmlChar(match){return htmlEscapes[match]}function getIndexOf(){var result=(result=lodash.indexOf)===indexOf?baseIndexOf:result;return result}function isNative(value){return typeof value=="function"&&reNative.test(value)}var setBindData=!defineProperty?noop:function(func,value){descriptor.value=value;defineProperty(func,"__bindData__",descriptor)};function shimIsPlainObject(value){var ctor,result;if(!(value&&toString.call(value)==objectClass)||(ctor=value.constructor,isFunction(ctor)&&!(ctor instanceof ctor))){return false}forIn(value,function(value,key){result=key});return typeof result=="undefined"||hasOwnProperty.call(value,result)}function unescapeHtmlChar(match){return htmlUnescapes[match]}function isArguments(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==argsClass||false}var isArray=nativeIsArray||function(value){return value&&typeof value=="object"&&typeof value.length=="number"&&toString.call(value)==arrayClass||false};var shimKeys=function(object){var index,iterable=object,result=[];if(!iterable)return result;if(!objectTypes[typeof object])return result;for(index in iterable){if(hasOwnProperty.call(iterable,index)){result.push(index)}}return result};var keys=!nativeKeys?shimKeys:function(object){if(!isObject(object)){return[]}return nativeKeys(object)};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];result[index]=callback?callback(result[index],iterable[index]):iterable[index]}}}return result};function clone(value,isDeep,callback,thisArg){if(typeof isDeep!="boolean"&&isDeep!=null){thisArg=callback;callback=isDeep;isDeep=false}return baseClone(value,isDeep,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function cloneDeep(value,callback,thisArg){return baseClone(value,true,typeof callback=="function"&&baseCreateCallback(callback,thisArg,1))}function create(prototype,properties){var result=baseCreate(prototype);return properties?assign(result,properties):result}var defaults=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;while(++argsIndex<argsLength){iterable=args[argsIndex];if(iterable&&objectTypes[typeof iterable]){var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(typeof result[index]=="undefined")result[index]=iterable[index]}}}return result};function findKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}function findLastKey(object,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forOwnRight(object,function(value,key,object){if(callback(value,key,object)){result=key;return false}});return result}var forIn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);for(index in iterable){if(callback(iterable[index],index,collection)===false)return result}return result};function forInRight(object,callback,thisArg){var pairs=[];forIn(object,function(value,key){pairs.push(key,value)});var length=pairs.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){if(callback(pairs[length--],pairs[length],object)===false){break}}return object}var forOwn=function(collection,callback,thisArg){var index,iterable=collection,result=iterable;if(!iterable)return result;if(!objectTypes[typeof iterable])return result;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);var ownIndex=-1,ownProps=objectTypes[typeof iterable]&&keys(iterable),length=ownProps?ownProps.length:0;while(++ownIndex<length){index=ownProps[ownIndex];if(callback(iterable[index],index,collection)===false)return result}return result};function forOwnRight(object,callback,thisArg){var props=keys(object),length=props.length;callback=baseCreateCallback(callback,thisArg,3);while(length--){var key=props[length];if(callback(object[key],key,object)===false){break}}return object}function functions(object){var result=[];forIn(object,function(value,key){if(isFunction(value)){result.push(key)}});return result.sort()}function has(object,key){return object?hasOwnProperty.call(object,key):false}function invert(object){var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index];result[object[key]]=key}return result}function isBoolean(value){return value===true||value===false||value&&typeof value=="object"&&toString.call(value)==boolClass||false}function isDate(value){return value&&typeof value=="object"&&toString.call(value)==dateClass||false}function isElement(value){return value&&value.nodeType===1||false}function isEmpty(value){var result=true;if(!value){return result}var className=toString.call(value),length=value.length;if(className==arrayClass||className==stringClass||className==argsClass||className==objectClass&&typeof length=="number"&&isFunction(value.splice)){return!length}forOwn(value,function(){return result=false});return result}function isEqual(a,b,callback,thisArg){return baseIsEqual(a,b,typeof callback=="function"&&baseCreateCallback(callback,thisArg,2))}function isFinite(value){return nativeIsFinite(value)&&!nativeIsNaN(parseFloat(value))}function isFunction(value){return typeof value=="function"}function isObject(value){return!!(value&&objectTypes[typeof value])}function isNaN(value){return isNumber(value)&&value!=+value}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||value&&typeof value=="object"&&toString.call(value)==numberClass||false}var isPlainObject=!getPrototypeOf?shimIsPlainObject:function(value){if(!(value&&toString.call(value)==objectClass)){return false}var valueOf=value.valueOf,objProto=isNative(valueOf)&&(objProto=getPrototypeOf(valueOf))&&getPrototypeOf(objProto);return objProto?value==objProto||getPrototypeOf(value)==objProto:shimIsPlainObject(value)};function isRegExp(value){return value&&typeof value=="object"&&toString.call(value)==regexpClass||false}function isString(value){return typeof value=="string"||value&&typeof value=="object"&&toString.call(value)==stringClass||false}function isUndefined(value){return typeof value=="undefined"}function mapValues(object,callback,thisArg){var result={};callback=lodash.createCallback(callback,thisArg,3);forOwn(object,function(value,key,object){result[key]=callback(value,key,object)});return result}function merge(object){var args=arguments,length=2;if(!isObject(object)){return object}if(typeof args[2]!="number"){length=args.length}if(length>3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index<length){baseMerge(object,sources[index],callback,stackA,stackB)}releaseArray(stackA);releaseArray(stackB);return object}function omit(object,callback,thisArg){var result={};if(typeof callback!="function"){var props=[];forIn(object,function(value,key){props.push(key)});props=baseDifference(props,baseFlatten(arguments,true,false,1));var index=-1,length=props.length;while(++index<length){var key=props[index];result[key]=object[key]}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(!callback(value,key,object)){result[key]=value}})}return result}function pairs(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}function pick(object,callback,thisArg){var result={};if(typeof callback!="function"){var index=-1,props=baseFlatten(arguments,true,false,1),length=isObject(object)?props.length:0;while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}}else{callback=lodash.createCallback(callback,thisArg,3);forIn(object,function(value,key,object){if(callback(value,key,object)){result[key]=value}})}return result}function transform(object,callback,accumulator,thisArg){var isArr=isArray(object);if(accumulator==null){if(isArr){accumulator=[]}else{var ctor=object&&object.constructor,proto=ctor&&ctor.prototype;accumulator=baseCreate(proto)}}if(callback){callback=lodash.createCallback(callback,thisArg,4);(isArr?forEach:forOwn)(object,function(value,index,object){return callback(accumulator,value,index,object)})}return accumulator}function values(object){var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function at(collection){var args=arguments,index=-1,props=baseFlatten(args,true,false,1),length=args[2]&&args[2][args[1]]===collection?1:props.length,result=Array(length);while(++index<length){result[index]=collection[props[index]]}return result}function contains(collection,target,fromIndex){var index=-1,indexOf=getIndexOf(),length=collection?collection.length:0,result=false;fromIndex=(fromIndex<0?nativeMax(0,length+fromIndex):fromIndex)||0;if(isArray(collection)){result=indexOf(collection,target,fromIndex)>-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(!(result=!!callback(collection[index],index,collection))){break}}}else{forOwn(collection,function(value,index,collection){return result=!!callback(value,index,collection)})}return result}function filter(collection,callback,thisArg){var result=[];callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){result.push(value)}}}else{forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result.push(value)}})}return result}function find(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){var value=collection[index];if(callback(value,index,collection)){return value}}}else{var result;forOwn(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}}function findLast(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);forEachRight(collection,function(value,index,collection){if(callback(value,index,collection)){result=value;return false}});return result}function forEach(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(++index<length){if(callback(collection[index],index,collection)===false){break}}}else{forOwn(collection,callback)}return collection}function forEachRight(collection,callback,thisArg){var length=collection?collection.length:0;callback=callback&&typeof thisArg=="undefined"?callback:baseCreateCallback(callback,thisArg,3);if(typeof length=="number"){while(length--){if(callback(collection[length],length,collection)===false){break}}}else{var props=keys(collection);length=props.length;forOwn(collection,function(value,key,collection){key=props?props[--length]:--length;return callback(collection[key],key,collection)})}return collection}var groupBy=createAggregator(function(result,value,key){(hasOwnProperty.call(result,key)?result[key]:result[key]=[]).push(value)});var indexBy=createAggregator(function(result,value,key){result[key]=value});function invoke(collection,methodName){var args=slice(arguments,2),index=-1,isFunc=typeof methodName=="function",length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){result[++index]=(isFunc?methodName:value[methodName]).apply(value,args)});return result}function map(collection,callback,thisArg){var index=-1,length=collection?collection.length:0;callback=lodash.createCallback(callback,thisArg,3);if(typeof length=="number"){var result=Array(length);while(++index<length){result[index]=callback(collection[index],index,collection)}}else{result=[];forOwn(collection,function(value,key,collection){result[++index]=callback(value,key,collection)})}return result}function max(collection,callback,thisArg){var computed=-Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value>result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];if(value<result){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current<computed){computed=current;result=value}})}return result}var pluck=map;function reduce(collection,callback,accumulator,thisArg){if(!collection)return accumulator;var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);var index=-1,length=collection.length;if(typeof length=="number"){if(noaccum){accumulator=collection[++index]}while(++index<length){accumulator=callback(accumulator,collection[index],index,collection)}}else{forOwn(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)})}return accumulator}function reduceRight(collection,callback,accumulator,thisArg){var noaccum=arguments.length<3;callback=lodash.createCallback(callback,thisArg,4);forEachRight(collection,function(value,index,collection){accumulator=noaccum?(noaccum=false,value):callback(accumulator,value,index,collection)});return accumulator}function reject(collection,callback,thisArg){callback=lodash.createCallback(callback,thisArg,3);return filter(collection,function(value,index,collection){return!callback(value,index,collection)})}function sample(collection,n,guard){if(collection&&typeof collection.length!="number"){collection=values(collection)}if(n==null||guard){return collection?collection[baseRandom(0,collection.length-1)]:undefined}var result=shuffle(collection);result.length=nativeMin(nativeMax(0,n),result.length);return result}function shuffle(collection){var index=-1,length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);forEach(collection,function(value){var rand=baseRandom(0,++index);result[index]=result[rand];result[rand]=value});return result}function size(collection){var length=collection?collection.length:0;return typeof length=="number"?length:keys(collection).length}function some(collection,callback,thisArg){var result;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++index<length){if(result=callback(collection[index],index,collection)){break}}}else{forOwn(collection,function(value,index,collection){return!(result=callback(value,index,collection))})}return!!result}function sortBy(collection,callback,thisArg){var index=-1,isArr=isArray(callback),length=collection?collection.length:0,result=Array(typeof length=="number"?length:0);if(!isArr){callback=lodash.createCallback(callback,thisArg,3)}forEach(collection,function(value,key,collection){var object=result[++index]=getObject();if(isArr){object.criteria=map(callback,function(key){return value[key]})}else{(object.criteria=getArray())[0]=callback(value,key,collection)}object.index=index;object.value=value});length=result.length;result.sort(compareAscending);while(length--){var object=result[length];result[length]=object.value;if(!isArr){releaseArray(object.criteria)}releaseObject(object)}return result}function toArray(collection){if(collection&&typeof collection.length=="number"){return slice(collection)}return values(collection)}var where=filter;function compact(array){var index=-1,length=array?array.length:0,result=[];while(++index<length){var value=array[index];if(value){result.push(value)}}return result}function difference(array){return baseDifference(array,baseFlatten(arguments,true,true,1))}function findIndex(array,callback,thisArg){var index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length){if(callback(array[index],index,array)){return index}}return-1}function findLastIndex(array,callback,thisArg){var length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(length--){if(callback(array[length],length,array)){return length}}return-1}function first(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=-1;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[0]:undefined}}return slice(array,0,nativeMin(nativeMax(0,n),length))}function flatten(array,isShallow,callback,thisArg){if(typeof isShallow!="boolean"&&isShallow!=null){thisArg=callback;callback=typeof isShallow!="function"&&thisArg&&thisArg[isShallow]===array?null:isShallow;isShallow=false}if(callback!=null){array=map(array,callback,thisArg)}return baseFlatten(array,isShallow)}function indexOf(array,value,fromIndex){if(typeof fromIndex=="number"){var length=array?array.length:0;fromIndex=fromIndex<0?nativeMax(0,length+fromIndex):fromIndex||0}else if(fromIndex){var index=sortedIndex(array,value);return array[index]===value?index:-1}return baseIndexOf(array,value,fromIndex)}function initial(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:callback||n}return slice(array,0,nativeMin(nativeMax(0,length-n),length))}function intersection(){var args=[],argsIndex=-1,argsLength=arguments.length,caches=getArray(),indexOf=getIndexOf(),trustIndexOf=indexOf===baseIndexOf,seen=getArray();while(++argsIndex<argsLength){var value=arguments[argsIndex];if(isArray(value)||isArguments(value)){args.push(value);caches.push(trustIndexOf&&value.length>=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index<length){var cache=caches[0];value=array[index];if((cache?cacheIndexOf(cache,value):indexOf(seen,value))<0){argsIndex=argsLength;(cache||seen).push(value);while(--argsIndex){cache=caches[argsIndex];if((cache?cacheIndexOf(cache,value):indexOf(args[argsIndex],value))<0){continue outer}}result.push(value)}}while(argsLength--){cache=caches[argsLength];if(cache){releaseObject(cache)}}releaseArray(caches);releaseArray(seen);return result}function last(array,callback,thisArg){var n=0,length=array?array.length:0;if(typeof callback!="number"&&callback!=null){var index=length;callback=lodash.createCallback(callback,thisArg,3);while(index--&&callback(array[index],index,array)){n++}}else{n=callback;if(n==null||thisArg){return array?array[length-1]:undefined}}return slice(array,nativeMax(0,length-n))}function lastIndexOf(array,value,fromIndex){var index=array?array.length:0;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(0,index+fromIndex):nativeMin(fromIndex,index-1))+1}while(index--){if(array[index]===value){return index}}return-1}function pull(array){var args=arguments,argsIndex=0,argsLength=args.length,length=array?array.length:0;while(++argsIndex<argsLength){var index=-1,value=args[argsIndex];while(++index<length){if(array[index]===value){splice.call(array,index--,1);length--}}}return array}function range(start,end,step){start=+start||0;step=typeof step=="number"?step:+step||1;if(end==null){end=start;start=0}var index=-1,length=nativeMax(0,ceil((end-start)/(step||1))),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function remove(array,callback,thisArg){var index=-1,length=array?array.length:0,result=[];callback=lodash.createCallback(callback,thisArg,3);while(++index<length){var value=array[index];if(callback(value,index,array)){result.push(value);splice.call(array,index--,1);length--}}return result}function rest(array,callback,thisArg){if(typeof callback!="number"&&callback!=null){var n=0,index=-1,length=array?array.length:0;callback=lodash.createCallback(callback,thisArg,3);while(++index<length&&callback(array[index],index,array)){n++}}else{n=callback==null||thisArg?1:nativeMax(0,callback)}return slice(array,n)}function sortedIndex(array,value,callback,thisArg){var low=0,high=array?array.length:low;callback=callback?lodash.createCallback(callback,thisArg,1):identity;value=callback(value);while(low<high){var mid=low+high>>>1;callback(array[mid])<value?low=mid+1:high=mid}return low}function union(){return baseUniq(baseFlatten(arguments,true,true))}function uniq(array,isSorted,callback,thisArg){if(typeof isSorted!="boolean"&&isSorted!=null){thisArg=callback;callback=typeof isSorted!="function"&&thisArg&&thisArg[isSorted]===array?null:isSorted;isSorted=false}if(callback!=null){callback=lodash.createCallback(callback,thisArg,3)}return baseUniq(array,isSorted,callback)}function without(array){return baseDifference(array,slice(arguments,1))}function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArray(array)||isArguments(array)){
+var result=result?baseUniq(baseDifference(result,array).concat(baseDifference(array,result))):array}}return result||[]}function zip(){var array=arguments.length>1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index<length){result[index]=pluck(array,index)}return result}function zipObject(keys,values){var index=-1,length=keys?keys.length:0,result={};if(!values&&length&&!isArray(keys[0])){values=[]}while(++index<length){var key=keys[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}function after(n,func){if(!isFunction(func)){throw new TypeError}return function(){if(--n<1){return func.apply(this,arguments)}}}function bind(func,thisArg){return arguments.length>2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index<length){var key=funcs[index];object[key]=createWrapper(object[key],1,null,null,object)}return object}function bindKey(object,key){return arguments.length>2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId)}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index<n){result[index]=callback(index)}return result}function unescape(string){return string==null?"":String(string).replace(reEscapedHtml,unescapeHtmlChar)}function uniqueId(prefix){var id=++idCounter;return String(prefix==null?"":prefix)+id}function chain(value){value=new lodashWrapper(value);value.__chain__=true;return value}function tap(value,interceptor){interceptor(value);return value}function wrapperChain(){this.__chain__=true;return this}function wrapperToString(){return String(this.__wrapped__)}function wrapperValueOf(){return this.__wrapped__}lodash.after=after;lodash.assign=assign;lodash.at=at;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.chain=chain;lodash.compact=compact;lodash.compose=compose;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.createCallback=createCallback;lodash.curry=curry;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.filter=filter;lodash.flatten=flatten;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.map=map;lodash.mapValues=mapValues;lodash.max=max;lodash.memoize=memoize;lodash.merge=merge;lodash.min=min;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.pull=pull;lodash.range=range;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.shuffle=shuffle;lodash.sortBy=sortBy;lodash.tap=tap;lodash.throttle=throttle;lodash.times=times;lodash.toArray=toArray;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.values=values;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.collect=map;lodash.drop=rest;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;lodash.unzip=zip;mixin(lodash);lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.contains=contains;lodash.escape=escape;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.has=has;lodash.identity=identity;lodash.indexOf=indexOf;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isNaN=isNaN;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isUndefined=isUndefined;lodash.lastIndexOf=lastIndexOf;lodash.mixin=mixin;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.result=result;lodash.runInContext=runInContext;lodash.size=size;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.template=template;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.all=every;lodash.any=some;lodash.detect=find;lodash.findWhere=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.include=contains;lodash.inject=reduce;mixin(function(){var source={};forOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.first=first;lodash.last=last;lodash.sample=sample;lodash.take=first;lodash.head=first;forOwn(lodash,function(func,methodName){var callbackable=methodName!=="sample";if(!lodash.prototype[methodName]){lodash.prototype[methodName]=function(n,guard){var chainAll=this.__chain__,result=func(this.__wrapped__,n,guard);return!chainAll&&(n==null||guard&&!(callbackable&&typeof n=="function"))?result:new lodashWrapper(result,chainAll)}}});lodash.VERSION="2.4.1";lodash.prototype.chain=wrapperChain;lodash.prototype.toString=wrapperToString;lodash.prototype.value=wrapperValueOf;lodash.prototype.valueOf=wrapperValueOf;forEach(["join","pop","shift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){var chainAll=this.__chain__,result=func.apply(this.__wrapped__,arguments);return chainAll?new lodashWrapper(result,chainAll):result}});forEach(["push","reverse","sort","unshift"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){func.apply(this.__wrapped__,arguments);return this}});forEach(["concat","slice","splice"],function(methodName){var func=arrayRef[methodName];lodash.prototype[methodName]=function(){return new lodashWrapper(func.apply(this.__wrapped__,arguments),this.__chain__)}});return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 764dd2cfcd76f..8b7b1f60501e4 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -140,8 +140,6 @@ function renderDagViz(forJob) {
     svg.selectAll("#" + nodeId).classed("cached", true);
   });
 
-  // More post-processing
-  drawClusterLabels(svg, forJob);
   resizeSvg(svg);
 }
 
@@ -151,7 +149,7 @@ function renderDagVizForStage(svgContainer) {
   var dot = metadata.select(".dot-file").text();
   var containerId = VizConstants.graphPrefix + metadata.attr("stage-id");
   var container = svgContainer.append("g").attr("id", containerId);
-  renderDot(dot, container, StagePageVizConstants.rankSep);
+  renderDot(dot, container, false);
 
   // Round corners on rectangles
   svgContainer
@@ -209,7 +207,7 @@ function renderDagVizForJob(svgContainer) {
     }
 
     // Actually render the stage
-    renderDot(dot, container, JobPageVizConstants.rankSep);
+    renderDot(dot, container, true);
 
     // Round corners on rectangles
     container
@@ -231,14 +229,14 @@ function renderDagVizForJob(svgContainer) {
 }
 
 /* Render the dot file as an SVG in the given container. */
-function renderDot(dot, container, rankSep) {
+function renderDot(dot, container, forJob) {
   var escaped_dot = dot
     .replace(/&lt;/g, "<")
     .replace(/&gt;/g, ">")
     .replace(/&quot;/g, "\"");
   var g = graphlibDot.read(escaped_dot);
-  g.graph().rankSep = rankSep;
   var renderer = new dagreD3.render();
+  preprocessGraphLayout(g, forJob);
   renderer(container, g);
 }
 
@@ -251,50 +249,37 @@ function graphContainer() { return d3.select("#dag-viz-graph"); }
 function metadataContainer() { return d3.select("#dag-viz-metadata"); }
 
 /*
- * Helper function to create draw a label for each cluster.
- *
- * We need to do this manually because dagre-d3 does not support labeling clusters.
- * In general, the clustering support for dagre-d3 is quite limited at this point.
+ * Helper function to pre-process the graph layout.
+ * This step is necessary for certain styles that affect the positioning
+ * and sizes of graph elements, e.g. padding, font style, shape.
  */
-function drawClusterLabels(svgContainer, forJob) {
-  var clusterLabelSize, stageClusterLabelSize;
+function preprocessGraphLayout(g, forJob) {
+  var nodes = g.nodes();
+  for (var i = 0; i < nodes.length; i++) {
+    var isCluster = g.children(nodes[i]).length > 0;
+    if (!isCluster) {
+      var node = g.node(nodes[i]);
+      if (forJob) {
+        // Do not display RDD name on job page
+        node.shape = "circle";
+        node.labelStyle = "font-size: 0px";
+      } else {
+        node.labelStyle = "font-size: 12px";
+      }
+      node.padding = "5";
+    }
+  }
+  // Curve the edges
+  var edges = g.edges();
+  for (var j = 0; j < edges.length; j++) {
+    edges[j].lineInterpolate = "basis";
+  }
+  // Adjust vertical separation between nodes
   if (forJob) {
-    clusterLabelSize = JobPageVizConstants.clusterLabelSize;
-    stageClusterLabelSize = JobPageVizConstants.stageClusterLabelSize;
+    g.graph().rankSep = JobPageVizConstants.rankSep;
   } else {
-    clusterLabelSize = StagePageVizConstants.clusterLabelSize;
-    stageClusterLabelSize = StagePageVizConstants.stageClusterLabelSize;
+    g.graph().rankSep = StagePageVizConstants.rankSep;
   }
-  svgContainer.selectAll("g.cluster").each(function() {
-    var cluster = d3.select(this);
-    var isStage = cluster.attr("id").indexOf(VizConstants.stageClusterPrefix) > -1;
-    var labelSize = isStage ? stageClusterLabelSize : clusterLabelSize;
-    drawClusterLabel(cluster, labelSize);
-  });
-}
-
-/*
- * Helper function to draw a label for the given cluster element based on its name.
- *
- * In the process, we need to expand the bounding box to make room for the label.
- * We need to do this because dagre-d3 did not take this into account when it first
- * rendered the bounding boxes. Note that this means we need to adjust the view box
- * of the SVG afterwards since we shifted a few boxes around.
- */
-function drawClusterLabel(d3cluster, fontSize) {
-  var cluster = d3cluster;
-  var rect = d3cluster.select("rect");
-  rect.attr("y", toFloat(rect.attr("y")) - fontSize);
-  rect.attr("height", toFloat(rect.attr("height")) + fontSize);
-  var labelX = toFloat(rect.attr("x")) + toFloat(rect.attr("width")) - fontSize / 2;
-  var labelY = toFloat(rect.attr("y")) + fontSize * 1.5;
-  var labelText = cluster.attr("name").replace(VizConstants.clusterPrefix, "");
-  cluster.append("text")
-    .attr("x", labelX)
-    .attr("y", labelY)
-    .attr("text-anchor", "end")
-    .style("font-size", fontSize + "px")
-    .text(labelText);
 }
 
 /*
@@ -444,7 +429,7 @@ function addTooltipsForRDDs(svgContainer) {
     if (tooltipText) {
       node.select("circle")
         .attr("data-toggle", "tooltip")
-        .attr("data-placement", "right")
+        .attr("data-placement", "bottom")
         .attr("title", tooltipText)
     }
   });
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 6a0f5c5d16daa..441c97d6d3e2a 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -360,7 +360,7 @@ private[spark] object UIUtils extends Logging {
         {
           graphs.map { g =>
             <div class="stage-metadata" stage-id={g.rootCluster.id} style="display:none">
-              <div class="dot-file">{RDDOperationGraph.makeDotFile(g, forJob)}</div>
+              <div class="dot-file">{RDDOperationGraph.makeDotFile(g)}</div>
               { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
               { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }
               {
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index b470aaa676e78..25d5c6ff7e9cd 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -158,54 +158,32 @@ private[ui] object RDDOperationGraph extends Logging {
    *
    * For the complete DOT specification, see http://www.graphviz.org/Documentation/dotguide.pdf.
    */
-  def makeDotFile(graph: RDDOperationGraph, forJob: Boolean): String = {
+  def makeDotFile(graph: RDDOperationGraph): String = {
     val dotFile = new StringBuilder
     dotFile.append("digraph G {\n")
-    dotFile.append(makeDotSubgraph(graph.rootCluster, forJob, indent = "  "))
-    graph.edges.foreach { edge =>
-      dotFile.append(s"""  ${edge.fromId}->${edge.toId} [lineInterpolate="basis"];\n""")
-    }
+    dotFile.append(makeDotSubgraph(graph.rootCluster, indent = "  "))
+    graph.edges.foreach { edge => dotFile.append(s"""  ${edge.fromId}->${edge.toId};\n""") }
     dotFile.append("}")
     val result = dotFile.toString()
     logDebug(result)
     result
   }
 
-  /**
-   * Return the dot representation of a node in an RDDOperationGraph.
-   *
-   * On the job page, is displayed as a small circle without labels.
-   * On the stage page, it is displayed as a box with an embedded label.
-   */
-  private def makeDotNode(node: RDDOperationNode, forJob: Boolean): String = {
-    val label = s"${node.name} (${node.id})"
-    if (forJob) {
-      s"""${node.id} [label="$label" shape="circle" padding="5" labelStyle="font-size: 0"]"""
-    } else {
-      s"""${node.id} [label="$label" padding="5" labelStyle="font-size: 12px"]"""
-    }
+  /** Return the dot representation of a node in an RDDOperationGraph. */
+  private def makeDotNode(node: RDDOperationNode): String = {
+    s"""${node.id} [label="${node.name} (${node.id})"]"""
   }
 
   /** Return the dot representation of a subgraph in an RDDOperationGraph. */
-  private def makeDotSubgraph(
-      cluster: RDDOperationCluster,
-      forJob: Boolean,
-      indent: String): String = {
+  private def makeDotSubgraph(cluster: RDDOperationCluster, indent: String): String = {
     val subgraph = new StringBuilder
-    // TODO: move specific graph properties like these to spark-dag-viz.js
-    val paddingTop = if (forJob) 10 else 20
     subgraph.append(indent + s"subgraph cluster${cluster.id} {\n")
     subgraph.append(indent + s"""  label="${cluster.name}";\n""")
-    // If there are nested clusters, add some padding
-    // Do this for the stage page because we use bigger fonts there
-    if (cluster.childClusters.nonEmpty) {
-      subgraph.append(indent + s"""  paddingTop="$paddingTop";\n""")
-    }
     cluster.childNodes.foreach { node =>
-      subgraph.append(indent + s"  ${makeDotNode(node, forJob)};\n")
+      subgraph.append(indent + s"  ${makeDotNode(node)};\n")
     }
     cluster.childClusters.foreach { cscope =>
-      subgraph.append(makeDotSubgraph(cscope, forJob, indent + "  "))
+      subgraph.append(makeDotSubgraph(cscope, indent + "  "))
     }
     subgraph.append(indent + "}\n")
     subgraph.toString()

From 4e290522c2a6310636317c54589dc35c91d95486 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Tue, 12 May 2015 11:51:55 -0700
Subject: [PATCH 112/320] [SPARK-7276] [DATAFRAME] speed up DataFrame.select by
 collapsing Project

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #5831 from cloud-fan/7276 and squashes the following commits:

ee4a1e1 [Wenchen Fan] fix rebase mistake
a3b565d [Wenchen Fan] refactor
99deb5d [Wenchen Fan] add test
f1f67ad [Wenchen Fan] fix 7276
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 40 +++++++++++--------
 .../optimizer/FilterPushdownSuite.scala       |  3 +-
 .../org/apache/spark/sql/DataFrame.scala      |  4 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 12 ++++++
 4 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1ee5fb245fbb2..b163707cc9925 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -43,6 +43,7 @@ object DefaultOptimizer extends Optimizer {
       PushPredicateThroughJoin,
       PushPredicateThroughGenerate,
       ColumnPruning,
+      ProjectCollapsing,
       CombineLimits) ::
     Batch("ConstantFolding", FixedPoint(100),
       NullPropagation,
@@ -114,7 +115,7 @@ object UnionPushdown extends Rule[LogicalPlan] {
  *   - Aggregate
  *   - Project <- Join
  *   - LeftSemiJoin
- *  - Collapse adjacent projections, performing alias substitution.
+ *  - Performing alias substitution.
  */
 object ColumnPruning extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
@@ -152,7 +153,28 @@ object ColumnPruning extends Rule[LogicalPlan] {
 
       Join(left, prunedChild(right, allReferences), LeftSemi, condition)
 
-    // Combine adjacent Projects.
+    case Project(projectList, Limit(exp, child)) =>
+      Limit(exp, Project(projectList, child))
+
+    // Eliminate no-op Projects
+    case Project(projectList, child) if child.output == projectList => child
+  }
+
+  /** Applies a projection only when the child is producing unnecessary attributes */
+  private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) =
+    if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
+      Project(allReferences.filter(c.outputSet.contains).toSeq, c)
+    } else {
+      c
+    }
+}
+
+/**
+ * Combines two adjacent [[Project]] operators into one, merging the
+ * expressions into one single expression.
+ */
+object ProjectCollapsing extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case Project(projectList1, Project(projectList2, child)) =>
       // Create a map of Aliases to their values from the child projection.
       // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)).
@@ -169,21 +191,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
       }).asInstanceOf[Seq[NamedExpression]]
 
       Project(substitutedProjection, child)
-
-    case Project(projectList, Limit(exp, child)) =>
-      Limit(exp, Project(projectList, child))
-      
-    // Eliminate no-op Projects
-    case Project(projectList, child) if child.output == projectList => child
   }
-
-  /** Applies a projection only when the child is producing unnecessary attributes */
-  private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) =
-    if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
-      Project(allReferences.filter(c.outputSet.contains).toSeq, c)
-    } else {
-      c
-    }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 58d415d9011e1..0c428f7231b8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -38,7 +38,8 @@ class FilterPushdownSuite extends PlanTest {
         PushPredicateThroughProject,
         PushPredicateThroughJoin,
         PushPredicateThroughGenerate,
-        ColumnPruning) :: Nil
+        ColumnPruning,
+        ProjectCollapsing) :: Nil
   }
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index f3107f7b51ad8..1f85dac682cbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -567,7 +567,9 @@ class DataFrame private[sql](
       case Column(expr: NamedExpression) => expr
       case Column(expr: Expression) => Alias(expr, expr.prettyString)()
     }
-    Project(namedExpressions.toSeq, logicalPlan)
+    // When user continuously call `select`, speed up analysis by collapsing `Project`
+    import org.apache.spark.sql.catalyst.optimizer.ProjectCollapsing
+    Project(namedExpressions.toSeq, ProjectCollapsing(logicalPlan))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d58438e5d129c..52aa1f6558f80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -493,4 +493,16 @@ class DataFrameSuite extends QueryTest {
       testData.dropDuplicates(Seq("value2")),
       Seq(Row(2, 1, 2), Row(1, 1, 1)))
   }
+
+  test("SPARK-7276: Project collapse for continuous select") {
+    var df = testData
+    for (i <- 1 to 5) {
+      df = df.select($"*")
+    }
+
+    import org.apache.spark.sql.catalyst.plans.logical.Project
+    // make sure df have at most two Projects
+    val p = df.logicalPlan.asInstanceOf[Project].child.asInstanceOf[Project]
+    assert(!p.child.isInstanceOf[Project])
+  }
 }

From b9b01f44f687f35460db9e0ab0b426897747596a Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Tue, 12 May 2015 12:06:30 -0700
Subject: [PATCH 113/320] [HOT FIX #6076] DAG visualization: curve the edges

---
 .../main/resources/org/apache/spark/ui/static/spark-dag-viz.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 8b7b1f60501e4..f7d0d3c61457c 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -272,7 +272,8 @@ function preprocessGraphLayout(g, forJob) {
   // Curve the edges
   var edges = g.edges();
   for (var j = 0; j < edges.length; j++) {
-    edges[j].lineInterpolate = "basis";
+    var edge = g.edge(edges[j]);
+    edge.lineInterpolate = "basis";
   }
   // Adjust vertical separation between nodes
   if (forJob) {

From 8e935b0a214f8b477fe9579fbf6a2d0a27b59118 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 12 May 2015 12:17:05 -0700
Subject: [PATCH 114/320] [SPARK-7487] [ML] Feature Parity in PySpark for
 ml.regression

Added LinearRegression Python API

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #6016 from brkyvz/ml-reg and squashes the following commits:

11c9ef9 [Burak Yavuz] address comments
1027a40 [Burak Yavuz] fix typo
4c699ad [Burak Yavuz] added tree regressor api
8afead2 [Burak Yavuz] made mixin for DT
fa51c74 [Burak Yavuz] save additions
0640d48 [Burak Yavuz] added ml.regression
82aac48 [Burak Yavuz] added linear regression
---
 python/docs/pyspark.ml.rst                    |  16 +
 python/pyspark/ml/classification.py           |   2 +-
 .../ml/param/_shared_params_code_gen.py       |  69 ++-
 python/pyspark/ml/param/shared.py             | 108 ++++
 python/pyspark/ml/regression.py               | 520 ++++++++++++++++++
 python/run-tests                              |   2 +
 6 files changed, 709 insertions(+), 8 deletions(-)
 create mode 100644 python/pyspark/ml/regression.py

diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index 220ea595df903..a42217a4aee73 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -25,6 +25,22 @@ pyspark.ml.classification module
     :undoc-members:
     :inherited-members:
 
+pyspark.ml.recommendation module
+-------------------------
+
+.. automodule:: pyspark.ml.recommendation
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.regression module
+-------------------------
+
+.. automodule:: pyspark.ml.regression
+    :members:
+    :undoc-members:
+    :inherited-members:
+
 pyspark.ml.tuning module
 --------------------------------
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 45754bc9d4b10..8a009c4ac721f 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -91,7 +91,7 @@ class LogisticRegressionModel(JavaModel):
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
-    sc = SparkContext("local[2]", "ml.feature tests")
+    sc = SparkContext("local[2]", "ml.classification tests")
     sqlContext = SQLContext(sc)
     globs['sc'] = sc
     globs['sqlContext'] = sqlContext
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 3be0979b92013..4a5cc6e64f023 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -38,16 +38,13 @@
 # python _shared_params_code_gen.py > shared.py
 
 
-def _gen_param_code(name, doc, defaultValueStr):
+def _gen_param_header(name, doc, defaultValueStr):
     """
-    Generates Python code for a shared param class.
+    Generates the header part for shared variables
 
     :param name: param name
     :param doc: param doc
-    :param defaultValueStr: string representation of the default value
-    :return: code string
     """
-    # TODO: How to correctly inherit instance attributes?
     template = '''class Has$Name(Params):
     """
     Mixin for param $name: $doc.
@@ -61,8 +58,27 @@ def __init__(self):
         #: param for $doc
         self.$name = Param(self, "$name", "$doc")
         if $defaultValueStr is not None:
-            self._setDefault($name=$defaultValueStr)
+            self._setDefault($name=$defaultValueStr)'''
+
+    Name = name[0].upper() + name[1:]
+    return template \
+        .replace("$name", name) \
+        .replace("$Name", Name) \
+        .replace("$doc", doc) \
+        .replace("$defaultValueStr", str(defaultValueStr))
 
+
+def _gen_param_code(name, doc, defaultValueStr):
+    """
+    Generates Python code for a shared param class.
+
+    :param name: param name
+    :param doc: param doc
+    :param defaultValueStr: string representation of the default value
+    :return: code string
+    """
+    # TODO: How to correctly inherit instance attributes?
+    template = '''
     def set$Name(self, value):
         """
         Sets the value of :py:attr:`$name`.
@@ -104,5 +120,44 @@ def get$Name(self):
         ("stepSize", "Step size to be used for each iteration of optimization.", None)]
     code = []
     for name, doc, defaultValueStr in shared:
-        code.append(_gen_param_code(name, doc, defaultValueStr))
+        param_code = _gen_param_header(name, doc, defaultValueStr)
+        code.append(param_code + "\n" + _gen_param_code(name, doc, defaultValueStr))
+
+    decisionTreeParams = [
+        ("maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; " +
+         "depth 1 means 1 internal node + 2 leaf nodes."),
+        ("maxBins", "Max number of bins for" +
+         " discretizing continuous features.  Must be >=2 and >= number of categories for any" +
+         " categorical feature."),
+        ("minInstancesPerNode", "Minimum number of instances each child must have after split. " +
+         "If a split causes the left or right child to have fewer than minInstancesPerNode, the " +
+         "split will be discarded as invalid. Should be >= 1."),
+        ("minInfoGain", "Minimum information gain for a split to be considered at a tree node."),
+        ("maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation."),
+        ("cacheNodeIds", "If false, the algorithm will pass trees to executors to match " +
+         "instances with nodes. If true, the algorithm will cache node IDs for each instance. " +
+         "Caching can speed up training of deeper trees.")]
+
+    decisionTreeCode = '''class DecisionTreeParams(Params):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    $dummyPlaceHolders
+
+    def __init__(self):
+        super(DecisionTreeParams, self).__init__()
+        $realParams'''
+    dtParamMethods = ""
+    dummyPlaceholders = ""
+    realParams = ""
+    paramTemplate = """$name = Param($owner, "$name", "$doc")"""
+    for name, doc in decisionTreeParams:
+        variable = paramTemplate.replace("$name", name).replace("$doc", doc)
+        dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n    "
+        realParams += "self." + variable.replace("$owner", "self") + "\n        "
+        dtParamMethods += _gen_param_code(name, doc, None) + "\n"
+    code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders)
+                .replace("$realParams", realParams) + dtParamMethods)
     print("\n\n\n".join(code))
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 4b22322b895b4..779cabe853f8e 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -424,3 +424,111 @@ def getStepSize(self):
         Gets the value of stepSize or its default value.
         """
         return self.getOrDefault(self.stepSize)
+
+
+class DecisionTreeParams(Params):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    # a placeholder to make it appear in the generated doc
+    maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.")
+    maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.")
+    minInstancesPerNode = Param(Params._dummy(), "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.")
+    minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
+    maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
+    cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
+
+    def __init__(self):
+        super(DecisionTreeParams, self).__init__()
+        #: param for Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
+        self.maxDepth = Param(self, "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.")
+        #: param for Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.
+        self.maxBins = Param(self, "maxBins", "Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.")
+        #: param for Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.
+        self.minInstancesPerNode = Param(self, "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.")
+        #: param for Minimum information gain for a split to be considered at a tree node.
+        self.minInfoGain = Param(self, "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
+        #: param for Maximum memory in MB allocated to histogram aggregation.
+        self.maxMemoryInMB = Param(self, "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
+        #: param for If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.
+        self.cacheNodeIds = Param(self, "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
+
+    def setMaxDepth(self, value):
+        """
+        Sets the value of :py:attr:`maxDepth`.
+        """
+        self.paramMap[self.maxDepth] = value
+        return self
+
+    def getMaxDepth(self):
+        """
+        Gets the value of maxDepth or its default value.
+        """
+        return self.getOrDefault(self.maxDepth)
+
+    def setMaxBins(self, value):
+        """
+        Sets the value of :py:attr:`maxBins`.
+        """
+        self.paramMap[self.maxBins] = value
+        return self
+
+    def getMaxBins(self):
+        """
+        Gets the value of maxBins or its default value.
+        """
+        return self.getOrDefault(self.maxBins)
+
+    def setMinInstancesPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minInstancesPerNode`.
+        """
+        self.paramMap[self.minInstancesPerNode] = value
+        return self
+
+    def getMinInstancesPerNode(self):
+        """
+        Gets the value of minInstancesPerNode or its default value.
+        """
+        return self.getOrDefault(self.minInstancesPerNode)
+
+    def setMinInfoGain(self, value):
+        """
+        Sets the value of :py:attr:`minInfoGain`.
+        """
+        self.paramMap[self.minInfoGain] = value
+        return self
+
+    def getMinInfoGain(self):
+        """
+        Gets the value of minInfoGain or its default value.
+        """
+        return self.getOrDefault(self.minInfoGain)
+
+    def setMaxMemoryInMB(self, value):
+        """
+        Sets the value of :py:attr:`maxMemoryInMB`.
+        """
+        self.paramMap[self.maxMemoryInMB] = value
+        return self
+
+    def getMaxMemoryInMB(self):
+        """
+        Gets the value of maxMemoryInMB or its default value.
+        """
+        return self.getOrDefault(self.maxMemoryInMB)
+
+    def setCacheNodeIds(self, value):
+        """
+        Sets the value of :py:attr:`cacheNodeIds`.
+        """
+        self.paramMap[self.cacheNodeIds] = value
+        return self
+
+    def getCacheNodeIds(self):
+        """
+        Gets the value of cacheNodeIds or its default value.
+        """
+        return self.getOrDefault(self.cacheNodeIds)
+
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
new file mode 100644
index 0000000000000..0ab5c6c3d20c3
--- /dev/null
+++ b/python/pyspark/ml/regression.py
@@ -0,0 +1,520 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.ml.util import keyword_only
+from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.param.shared import *
+from pyspark.mllib.common import inherit_doc
+
+
+__all__ = ['DecisionTreeRegressor', 'DecisionTreeRegressionModel', 'GBTRegressor',
+           'GBTRegressionModel', 'LinearRegression', 'LinearRegressionModel',
+           'RandomForestRegressor', 'RandomForestRegressionModel']
+
+
+@inherit_doc
+class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                       HasRegParam, HasTol):
+    """
+    Linear regression.
+
+    The learning objective is to minimize the squared error, with regularization.
+    The specific squared error loss function used is:
+      L = 1/2n ||A weights - y||^2^
+
+    This support multiple types of regularization:
+     - none (a.k.a. ordinary least squares)
+     - L2 (ridge regression)
+     - L1 (Lasso)
+     - L2 + L1 (elastic net)
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> lr = LinearRegression(maxIter=5, regParam=0.0)
+    >>> model = lr.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    -1.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> lr.setParams("vector")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    """
+    _java_class = "org.apache.spark.ml.regression.LinearRegression"
+    # a placeholder to make it appear in the generated doc
+    elasticNetParam = \
+        Param(Params._dummy(), "elasticNetParam",
+              "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+              "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+        """
+        super(LinearRegression, self).__init__()
+        #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
+        #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
+        self.elasticNetParam = \
+            Param(self, "elasticNetParam",
+                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
+                  "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+        self._setDefault(maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
+        Sets params for linear regression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return LinearRegressionModel(java_model)
+
+    def setElasticNetParam(self, value):
+        """
+        Sets the value of :py:attr:`elasticNetParam`.
+        """
+        self.paramMap[self.elasticNetParam] = value
+        return self
+
+    def getElasticNetParam(self):
+        """
+        Gets the value of elasticNetParam or its default value.
+        """
+        return self.getOrDefault(self.elasticNetParam)
+
+
+class LinearRegressionModel(JavaModel):
+    """
+    Model fitted by LinearRegression.
+    """
+
+
+class TreeRegressorParams(object):
+    """
+    Private class to track supported impurity measures.
+    """
+    supportedImpurities = ["variance"]
+
+
+class RandomForestParams(object):
+    """
+    Private class to track supported random forest parameters.
+    """
+    supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"]
+
+
+class GBTParams(object):
+    """
+    Private class to track supported GBT params.
+    """
+    supportedLossTypes = ["squared", "absolute"]
+
+
+@inherit_doc
+class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                            DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree`
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> dt = DecisionTreeRegressor(maxDepth=2)
+    >>> model = dt.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.regression.DecisionTreeRegressor"
+    # a placeholder to make it appear in the generated doc
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
+        """
+        super(DecisionTreeRegressor, self).__init__()
+        #: param for Criterion used for information gain calculation (case-insensitive).
+        self.impurity = \
+            Param(self, "impurity",
+                  "Criterion used for information gain calculation (case-insensitive). " +
+                  "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="variance")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="variance"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="variance")
+        Sets params for the DecisionTreeRegressor.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return DecisionTreeRegressionModel(java_model)
+
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        self.paramMap[self.impurity] = value
+        return self
+
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class DecisionTreeRegressionModel(JavaModel):
+    """
+    Model fitted by DecisionTreeRegressor.
+    """
+
+
+@inherit_doc
+class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
+                            DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Random_forest  Random Forest`
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2)
+    >>> model = rf.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    0.5
+    """
+
+    _java_class = "org.apache.spark.ml.regression.RandomForestRegressor"
+    # a placeholder to make it appear in the generated doc
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the training data used for learning each decision tree, " +
+                            "in range (0, 1].")
+    numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1)")
+    featureSubsetStrategy = \
+        Param(Params._dummy(), "featureSubsetStrategy",
+              "The number of features to consider for splits at each tree node. Supported " +
+              "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
+                 numTrees=20, featureSubsetStrategy="auto", seed=42):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
+                 numTrees=20, featureSubsetStrategy="auto", seed=42)
+        """
+        super(RandomForestRegressor, self).__init__()
+        #: param for Criterion used for information gain calculation (case-insensitive).
+        self.impurity = \
+            Param(self, "impurity",
+                  "Criterion used for information gain calculation (case-insensitive). " +
+                  "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities))
+        #: param for Fraction of the training data used for learning each decision tree,
+        #  in range (0, 1]
+        self.subsamplingRate = Param(self, "subsamplingRate",
+                                     "Fraction of the training data used for learning each " +
+                                     "decision tree, in range (0, 1].")
+        #: param for Number of trees to train (>= 1)
+        self.numTrees = Param(self, "numTrees", "Number of trees to train (>= 1)")
+        #: param for The number of features to consider for splits at each tree node
+        self.featureSubsetStrategy = \
+            Param(self, "featureSubsetStrategy",
+                  "The number of features to consider for splits at each tree node. Supported " +
+                  "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                         impurity="variance", numTrees=20, featureSubsetStrategy="auto")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  impurity="variance", numTrees=20, featureSubsetStrategy="auto"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  impurity="variance", numTrees=20, featureSubsetStrategy="auto")
+        Sets params for linear regression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return RandomForestRegressionModel(java_model)
+
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        self.paramMap[self.impurity] = value
+        return self
+
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        self.paramMap[self.subsamplingRate] = value
+        return self
+
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    def setNumTrees(self, value):
+        """
+        Sets the value of :py:attr:`numTrees`.
+        """
+        self.paramMap[self.numTrees] = value
+        return self
+
+    def getNumTrees(self):
+        """
+        Gets the value of numTrees or its default value.
+        """
+        return self.getOrDefault(self.numTrees)
+
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        self.paramMap[self.featureSubsetStrategy] = value
+        return self
+
+    def getFeatureSubsetStrategy(self):
+        """
+        Gets the value of featureSubsetStrategy or its default value.
+        """
+        return self.getOrDefault(self.featureSubsetStrategy)
+
+
+class RandomForestRegressionModel(JavaModel):
+    """
+    Model fitted by RandomForestRegressor.
+    """
+
+
+@inherit_doc
+class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                   DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)`
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> gbt = GBTRegressor(maxIter=5, maxDepth=2)
+    >>> model = gbt.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.regression.GBTRegressor"
+    # a placeholder to make it appear in the generated doc
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the training data used for learning each decision tree, " +
+                            "in range (0, 1].")
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the " +
+                     "contribution of each estimator")
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
+                 maxIter=20, stepSize=0.1):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
+                 maxIter=20, stepSize=0.1)
+        """
+        super(GBTRegressor, self).__init__()
+        #: param for Loss function which GBT tries to minimize (case-insensitive).
+        self.lossType = Param(self, "lossType",
+                              "Loss function which GBT tries to minimize (case-insensitive). " +
+                              "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+        #: Fraction of the training data used for learning each decision tree, in range (0, 1].
+        self.subsamplingRate = Param(self, "subsamplingRate",
+                                     "Fraction of the training data used for learning each " +
+                                     "decision tree, in range (0, 1].")
+        #: Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of
+        #  each estimator
+        self.stepSize = Param(self, "stepSize",
+                              "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                              "the contribution of each estimator")
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         lossType="squared", maxIter=20, stepSize=0.1)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="squared", maxIter=20, stepSize=0.1):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="squared", maxIter=20, stepSize=0.1)
+        Sets params for Gradient Boosted Tree Regression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return GBTRegressionModel(java_model)
+
+    def setLossType(self, value):
+        """
+        Sets the value of :py:attr:`lossType`.
+        """
+        self.paramMap[self.lossType] = value
+        return self
+
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        self.paramMap[self.subsamplingRate] = value
+        return self
+
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        self.paramMap[self.stepSize] = value
+        return self
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+
+class GBTRegressionModel(JavaModel):
+    """
+    Model fitted by GBTRegressor.
+    """
+
+
+if __name__ == "__main__":
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SQLContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    sc = SparkContext("local[2]", "ml.regression tests")
+    sqlContext = SQLContext(sc)
+    globs['sc'] = sc
+    globs['sqlContext'] = sqlContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    sc.stop()
+    if failure_count:
+        exit(-1)
diff --git a/python/run-tests b/python/run-tests
index f9ca26467f17e..f2757a3967e81 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -98,6 +98,8 @@ function run_ml_tests() {
     echo "Run ml tests ..."
     run_test "pyspark/ml/feature.py"
     run_test "pyspark/ml/classification.py"
+    run_test "pyspark/ml/recommendation.py"
+    run_test "pyspark/ml/regression.py"
     run_test "pyspark/ml/tuning.py"
     run_test "pyspark/ml/tests.py"
     run_test "pyspark/ml/evaluation.py"

From 5438f49ccf374fed16bc2b7fc1556e4c0095b14c Mon Sep 17 00:00:00 2001
From: Tim Ellison <t.p.ellison@gmail.com>
Date: Tue, 12 May 2015 20:48:26 +0100
Subject: [PATCH 115/320] =?UTF-8?q?[SPARK-2018]=20[CORE]=20Upgrade=20LZF?=
 =?UTF-8?q?=20library=20to=20fix=20endian=20serialization=20p=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…roblem

Pick up newer version of dependency with fix for SPARK-2018.  The update involved patching the ning/compress LZF library to handle big endian systems correctly.

Credit goes to gireeshpunathil for diagnosing the problem, and cowtowncoder for fixing it.

Spark tests run clean for me.

Author: Tim Ellison <t.p.ellison@gmail.com>

Closes #6077 from tellison/UpgradeLZF and squashes the following commits:

ad8d4ef [Tim Ellison] [SPARK-2018] [CORE] Upgrade LZF library to fix endian serialization problem
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 94cf28332cf47..cf9279ea5a2a6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -481,7 +481,7 @@
       <dependency>
         <groupId>com.ning</groupId>
         <artifactId>compress-lzf</artifactId>
-        <version>1.0.0</version>
+        <version>1.0.3</version>
       </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>

From 595a67589a42f8025d3e5fd4da413b1faa2e14bf Mon Sep 17 00:00:00 2001
From: Ram Sriharsha <rsriharsha@hw11853.local>
Date: Tue, 12 May 2015 13:35:12 -0700
Subject: [PATCH 116/320] [SPARK-7015] [MLLIB] [WIP] Multiclass to Binary
 Reduction: One Against All

initial cut of one against all. test code is a scaffolding , not fully implemented.
This WIP is to gather early feedback.

Author: Ram Sriharsha <rsriharsha@hw11853.local>

Closes #5830 from harsha2010/reduction and squashes the following commits:

5f4b495 [Ram Sriharsha] Fix Test
386e98b [Ram Sriharsha] Style fix
49b4a17 [Ram Sriharsha] Simplify the test
02279cc [Ram Sriharsha] Output Label Metadata in Prediction Col
bc78032 [Ram Sriharsha] Code Review Updates
8ce4845 [Ram Sriharsha] Merge with Master
2a807be [Ram Sriharsha] Merge branch 'master' into reduction
e21bfcc [Ram Sriharsha] Style Fix
5614f23 [Ram Sriharsha] Style Fix
c75583a [Ram Sriharsha] Cleanup
7a5f136 [Ram Sriharsha] Fix TODOs
804826b [Ram Sriharsha] Merge with Master
1448a5f [Ram Sriharsha] Style Fix
6e47807 [Ram Sriharsha] Style Fix
d63e46b [Ram Sriharsha] Incorporate Code Review Feedback
ced68b5 [Ram Sriharsha] Refactor OneVsAll to implement Predictor
78fa82a [Ram Sriharsha] extra line
0dfa1fb [Ram Sriharsha] Fix inexhaustive match cases that may arise from UnresolvedAttribute
a59a4f4 [Ram Sriharsha] @Experimental
4167234 [Ram Sriharsha] Merge branch 'master' into reduction
868a4fd [Ram Sriharsha] @Experimental
041d905 [Ram Sriharsha] Code Review Fixes
df188d8 [Ram Sriharsha] Style fix
612ec48 [Ram Sriharsha] Style Fix
6ef43d3 [Ram Sriharsha] Prefer Unresolved Attribute to Option: Java APIs are cleaner
6bf6bff [Ram Sriharsha] Update OneHotEncoder to new API
e29cb89 [Ram Sriharsha] Merge branch 'master' into reduction
1c7fa44 [Ram Sriharsha] Fix Tests
ca83672 [Ram Sriharsha] Incorporate Code Review Feedback + Rename to OneVsRestClassifier
221beeed [Ram Sriharsha] Upgrade to use Copy method for cloning Base Classifiers
26f1ddb [Ram Sriharsha] Merge with SPARK-5956 API changes
9738744 [Ram Sriharsha] Merge branch 'master' into reduction
1a3e375 [Ram Sriharsha] More efficient Implementation: Use withColumn to generate label column dynamically
32e0189 [Ram Sriharsha] Restrict reduction to Margin Based Classifiers
ff272da [Ram Sriharsha] Style fix
28771f5 [Ram Sriharsha] Add Tests for Multiclass to Binary Reduction
b60f874 [Ram Sriharsha] Fix Style issues in Test
3191cdf [Ram Sriharsha] Remove this test, accidental commit
23f056c [Ram Sriharsha] Fix Headers for test
1b5e929 [Ram Sriharsha] Fix Style issues and add Header
8752863 [Ram Sriharsha] [SPARK-7015][MLLib][WIP] Multiclass to Binary Reduction: One Against All
---
 .../scala/org/apache/spark/ml/Predictor.scala |   3 +-
 .../spark/ml/attribute/AttributeGroup.scala   |   1 +
 .../spark/ml/attribute/AttributeType.scala    |   8 +
 .../spark/ml/attribute/attributes.scala       |  37 ++-
 .../spark/ml/feature/VectorIndexer.scala      |   4 +-
 .../apache/spark/ml/reduction/OneVsRest.scala | 211 ++++++++++++++++++
 .../apache/spark/ml/util/MetadataUtils.scala  |   7 +-
 .../ml/reduction/JavaOneVsRestSuite.java      |  85 +++++++
 .../spark/ml/attribute/AttributeSuite.scala   |  10 +-
 .../spark/ml/reduction/OneVsRestSuite.scala   | 113 ++++++++++
 10 files changed, 471 insertions(+), 8 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 0e53877de92db..f6a5f27425d1f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -113,7 +113,8 @@ abstract class Predictor[
    *
    * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
    */
-  protected def featuresDataType: DataType = new VectorUDT
+  @DeveloperApi
+  private[ml] def featuresDataType: DataType = new VectorUDT
 
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema, fitting = true, featuresDataType)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
index d7dee8fed2a55..f5f37aa77929c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
@@ -123,6 +123,7 @@ class AttributeGroup private (
           nominalMetadata += nominal.toMetadataImpl(withType = false)
         case binary: BinaryAttribute =>
           binaryMetadata += binary.toMetadataImpl(withType = false)
+        case UnresolvedAttribute =>
       }
       val attrBldr = new MetadataBuilder
       if (numericMetadata.nonEmpty) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
index 65e7e43d5a5b0..a83febd7de2cc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
@@ -43,6 +43,12 @@ object AttributeType {
     Binary
   }
 
+  /** Unresolved type. */
+  val Unresolved: AttributeType = {
+    case object Unresolved extends AttributeType("unresolved")
+    Unresolved
+  }
+
   /**
    * Gets the [[AttributeType]] object from its name.
    * @param name attribute type name: "numeric", "nominal", or "binary"
@@ -54,6 +60,8 @@ object AttributeType {
       Nominal
     } else if (name == Binary.name) {
       Binary
+    } else if (name == Unresolved.name) {
+      Unresolved
     } else {
       throw new IllegalArgumentException(s"Cannot recognize type $name.")
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 5717d6ec2eaec..e8f7f152784a1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -125,7 +125,13 @@ private[attribute] trait AttributeFactory {
    */
   def fromStructField(field: StructField): Attribute = {
     require(field.dataType == DoubleType)
-    fromMetadata(field.metadata.getMetadata(AttributeKeys.ML_ATTR)).withName(field.name)
+    val metadata = field.metadata
+    val mlAttr = AttributeKeys.ML_ATTR
+    if (metadata.contains(mlAttr)) {
+      fromMetadata(metadata.getMetadata(mlAttr)).withName(field.name)
+    } else {
+      UnresolvedAttribute
+    }
   }
 }
 
@@ -535,3 +541,32 @@ object BinaryAttribute extends AttributeFactory {
     new BinaryAttribute(name, index, values)
   }
 }
+
+/**
+ * An unresolved attribute.
+ */
+object UnresolvedAttribute extends Attribute {
+
+  override def attrType: AttributeType = AttributeType.Unresolved
+
+  override def withIndex(index: Int): Attribute = this
+
+  override def isNumeric: Boolean = false
+
+  override def withoutIndex: Attribute = this
+
+  override def isNominal: Boolean = false
+
+  override def name: Option[String] = None
+
+  override private[attribute] def toMetadataImpl(withType: Boolean): Metadata = {
+    Metadata.empty
+  }
+
+  override def withoutName: Attribute = this
+
+  override def index: Option[Int] = None
+
+  override def withName(name: String): Attribute = this
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 07ea579d69893..2e6313ac14485 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, BinaryAttribute, NominalAttribute, NumericAttribute}
+import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.param.{IntParam, ParamValidators, Params}
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
@@ -375,6 +375,8 @@ class VectorIndexerModel private[ml] (
           }
         case (origAttr: Attribute, featAttr: NumericAttribute) =>
           origAttr.withIndex(featAttr.index.get)
+        case (origAttr: Attribute, _) =>
+          origAttr
       }
     } else {
       partialFeatureAttributes
diff --git a/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
new file mode 100644
index 0000000000000..0a6728ef1f779
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.reduction
+
+import java.util.UUID
+
+import scala.language.existentials
+
+import org.apache.spark.annotation.{AlphaComponent, Experimental}
+import org.apache.spark.ml._
+import org.apache.spark.ml.attribute._
+import org.apache.spark.ml.classification.{ClassificationModel, Classifier}
+import org.apache.spark.ml.param.Param
+import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Params for [[OneVsRest]].
+ */
+private[ml] trait OneVsRestParams extends PredictorParams {
+
+  type ClassifierType = Classifier[F, E, M] forSome {
+    type F
+    type M <: ClassificationModel[F, M]
+    type E <:  Classifier[F, E, M]
+  }
+
+  /**
+   * param for the base binary classifier that we reduce multiclass classification into.
+   * @group param
+   */
+  val classifier: Param[ClassifierType]  =
+    new Param(this, "classifier", "base binary classifier ")
+
+  /** @group getParam */
+  def getClassifier: ClassifierType = $(classifier)
+
+}
+
+/**
+ * Model produced by [[OneVsRest]].
+ * Stores the models resulting from training k different classifiers:
+ * one for each class.
+ * Each example is scored against all k models and the model with highest score
+ * is picked to label the example.
+ * TODO: API may need to change when we introduce a ClassificationModel trait as the public API
+ * @param parent
+ * @param labelMetadata Metadata of label column if it exists, or Nominal attribute
+ *                      representing the number of classes in training dataset otherwise.
+ * @param models the binary classification models for reduction.
+ *               The i-th model is produced by testing the i-th class vs the rest.
+ */
+@AlphaComponent
+class OneVsRestModel(
+      override val parent: OneVsRest,
+      labelMetadata: Metadata,
+      val models: Array[_ <: ClassificationModel[_,_]])
+  extends Model[OneVsRestModel] with OneVsRestParams {
+
+  override def transformSchema(schema: StructType): StructType = {
+    validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType)
+  }
+
+  override def transform(dataset: DataFrame): DataFrame = {
+    // Check schema
+    transformSchema(dataset.schema, logging = true)
+
+    // determine the input columns: these need to be passed through
+    val origCols = dataset.schema.map(f => col(f.name))
+
+    // add an accumulator column to store predictions of all the models
+    val accColName = "mbc$acc" + UUID.randomUUID().toString
+    val init: () => Map[Int, Double] = () => {Map()}
+    val mapType = MapType(IntegerType, DoubleType, false)
+    val newDataset = dataset.withColumn(accColName, callUDF(init, mapType))
+
+    // persist if underlying dataset is not persistent.
+    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    if (handlePersistence) {
+      newDataset.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    // update the accumulator column with the result of prediction of models
+    val aggregatedDataset = models.zipWithIndex.foldLeft[DataFrame](newDataset) {
+      case (df, (model, index)) => {
+        val rawPredictionCol = model.getRawPredictionCol
+        val columns = origCols ++ List(col(rawPredictionCol), col(accColName))
+
+        // add temporary column to store intermediate scores and update
+        val tmpColName = "mbc$tmp" + UUID.randomUUID().toString
+        val update: (Map[Int, Double], Vector) => Map[Int, Double]  =
+          (predictions: Map[Int, Double], prediction: Vector) => {
+            predictions + ((index, prediction(1)))
+        }
+        val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol))
+        val transformedDataset = model.transform(df).select(columns:_*)
+        val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf)
+        val newColumns = origCols ++ List(col(tmpColName))
+
+        // switch out the intermediate column with the accumulator column
+        updatedDataset.select(newColumns:_*).withColumnRenamed(tmpColName, accColName)
+      }
+    }
+
+    if (handlePersistence) {
+      newDataset.unpersist()
+    }
+
+    // output the index of the classifier with highest confidence as prediction
+    val label: Map[Int, Double] => Double = (predictions: Map[Int, Double]) => {
+      predictions.maxBy(_._2)._1.toDouble
+    }
+
+    // output label and label metadata as prediction
+    val labelUdf = callUDF(label, DoubleType, col(accColName))
+    aggregatedDataset.withColumn($(predictionCol), labelUdf.as($(predictionCol), labelMetadata))
+  }
+}
+
+/**
+ * :: Experimental ::
+ *
+ * Reduction of Multiclass Classification to Binary Classification.
+ * Performs reduction using one against all strategy.
+ * For a multiclass classification with k classes, train k models (one per class).
+ * Each example is scored against all k models and the model with highest score
+ * is picked to label the example.
+ */
+@Experimental
+final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
+
+  /** @group setParam */
+  // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed.
+  def setClassifier(value: Classifier[_,_,_]): this.type = {
+    set(classifier, value.asInstanceOf[ClassifierType])
+  }
+
+  override def transformSchema(schema: StructType): StructType = {
+    validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType)
+  }
+
+  override def fit(dataset: DataFrame): OneVsRestModel = {
+    // determine number of classes either from metadata if provided, or via computation.
+    val labelSchema = dataset.schema($(labelCol))
+    val computeNumClasses: () => Int = () => {
+      val Row(maxLabelIndex: Double) = dataset.agg(max($(labelCol))).head()
+      // classes are assumed to be numbered from 0,...,maxLabelIndex
+      maxLabelIndex.toInt + 1
+    }
+    val numClasses = MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity)
+
+    val multiclassLabeled = dataset.select($(labelCol), $(featuresCol))
+
+    // persist if underlying dataset is not persistent.
+    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    if (handlePersistence) {
+      multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    // create k columns, one for each binary classifier.
+    val models = Range(0, numClasses).par.map { index =>
+
+      val label: Double => Double = (label: Double) => {
+        if (label.toInt == index) 1.0 else 0.0
+      }
+
+      // generate new label metadata for the binary problem.
+      // TODO: use when ... otherwise after SPARK-7321 is merged
+      val labelUDF = callUDF(label, DoubleType, col($(labelCol)))
+      val newLabelMeta = BinaryAttribute.defaultAttr.withName("label").toMetadata()
+      val labelColName = "mc2b$" + index
+      val labelUDFWithNewMeta = labelUDF.as(labelColName, newLabelMeta)
+      val trainingDataset = multiclassLabeled.withColumn(labelColName, labelUDFWithNewMeta)
+      val classifier = getClassifier
+      classifier.fit(trainingDataset, classifier.labelCol -> labelColName)
+    }.toArray[ClassificationModel[_,_]]
+
+    if (handlePersistence) {
+      multiclassLabeled.unpersist()
+    }
+
+    // extract label metadata from label column if present, or create a nominal attribute
+    // to output the number of labels
+    val labelAttribute = Attribute.fromStructField(labelSchema) match {
+      case _: NumericAttribute | UnresolvedAttribute => {
+        NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
+      }
+      case attr: Attribute => attr
+    }
+    copyValues(new OneVsRestModel(this, labelAttribute.toMetadata(), models))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index c84c8b4eb744f..56075c9a6b39f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -20,8 +20,7 @@ package org.apache.spark.ml.util
 import scala.collection.immutable.HashMap
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, BinaryAttribute, NominalAttribute,
-  NumericAttribute}
+import org.apache.spark.ml.attribute._
 import org.apache.spark.sql.types.StructField
 
 
@@ -39,9 +38,9 @@ object MetadataUtils {
    */
   def getNumClasses(labelSchema: StructField): Option[Int] = {
     Attribute.fromStructField(labelSchema) match {
-      case numAttr: NumericAttribute => None
       case binAttr: BinaryAttribute => Some(2)
       case nomAttr: NominalAttribute => nomAttr.getNumValues
+      case _: NumericAttribute | UnresolvedAttribute => None
     }
   }
 
@@ -65,7 +64,7 @@ object MetadataUtils {
           Iterator()
         } else {
           attr match {
-            case numAttr: NumericAttribute => Iterator()
+            case _: NumericAttribute | UnresolvedAttribute => Iterator()
             case binAttr: BinaryAttribute => Iterator(idx -> 2)
             case nomAttr: NominalAttribute =>
               nomAttr.getNumValues match {
diff --git a/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
new file mode 100644
index 0000000000000..40a90ae9ded60
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.reduction;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import static scala.collection.JavaConversions.seqAsJavaList;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegression;
+import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateMultinomialLogisticInput;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+
+public class JavaOneVsRestSuite implements Serializable {
+
+    private transient JavaSparkContext jsc;
+    private transient SQLContext jsql;
+    private transient DataFrame dataset;
+    private transient JavaRDD<LabeledPoint> datasetRDD;
+
+    @Before
+    public void setUp() {
+        jsc = new JavaSparkContext("local", "JavaLOneVsRestSuite");
+        jsql = new SQLContext(jsc);
+        int nPoints = 3;
+
+        /**
+         * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
+         * As a result, we are actually drawing samples from probability distribution of built model.
+         */
+        double[] weights = {
+                -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
+                -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 };
+
+        double[] xMean = {5.843, 3.057, 3.758, 1.199};
+        double[] xVariance = {0.6856, 0.1899, 3.116, 0.581};
+        List<LabeledPoint> points = seqAsJavaList(generateMultinomialLogisticInput(
+                weights, xMean, xVariance, true, nPoints, 42));
+        datasetRDD = jsc.parallelize(points, 2);
+        dataset = jsql.createDataFrame(datasetRDD, LabeledPoint.class);
+    }
+
+    @After
+    public void tearDown() {
+        jsc.stop();
+        jsc = null;
+    }
+
+    @Test
+    public void oneVsRestDefaultParams() {
+        OneVsRest ova = new OneVsRest();
+        ova.setClassifier(new LogisticRegression());
+        Assert.assertEquals(ova.getLabelCol() , "label");
+        Assert.assertEquals(ova.getPredictionCol() , "prediction");
+        OneVsRestModel ovaModel = ova.fit(dataset);
+        DataFrame predictions = ovaModel.transform(dataset).select("label", "prediction");
+        predictions.collectAsList();
+        Assert.assertEquals(ovaModel.getLabelCol(), "label");
+        Assert.assertEquals(ovaModel.getPredictionCol() , "prediction");
+    }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
index 3e1a7196e37cb..ec9b717e41ce8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.attribute
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.types.{DoubleType, MetadataBuilder, Metadata}
+import org.apache.spark.sql.types._
 
 class AttributeSuite extends FunSuite {
 
@@ -209,4 +209,12 @@ class AttributeSuite extends FunSuite {
     intercept[IllegalArgumentException](attr.withName(""))
     intercept[IllegalArgumentException](attr.withIndex(-1))
   }
+
+  test("attribute from struct field") {
+    val metadata = NumericAttribute.defaultAttr.withName("label").toMetadata()
+    val fldWithoutMeta = new StructField("x", DoubleType, false, Metadata.empty)
+    assert(Attribute.fromStructField(fldWithoutMeta) == UnresolvedAttribute)
+    val fldWithMeta = new StructField("x", DoubleType, false, metadata)
+    assert(Attribute.fromStructField(fldWithMeta).isNumeric)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
new file mode 100644
index 0000000000000..ebec7c68e8144
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.reduction
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.attribute.NominalAttribute
+import org.apache.spark.ml.classification.{LogisticRegressionModel, LogisticRegression}
+import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.mllib.classification.LogisticRegressionSuite._
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SQLContext}
+
+class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
+
+  @transient var sqlContext: SQLContext = _
+  @transient var dataset: DataFrame = _
+  @transient var rdd: RDD[LabeledPoint] = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sqlContext = new SQLContext(sc)
+    val nPoints = 1000
+
+    /**
+     * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
+     * As a result, we are actually drawing samples from probability distribution of built model.
+     */
+    val weights = Array(
+      -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
+      -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)
+
+    val xMean = Array(5.843, 3.057, 3.758, 1.199)
+    val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
+    rdd = sc.parallelize(generateMultinomialLogisticInput(
+      weights, xMean, xVariance, true, nPoints, 42), 2)
+    dataset = sqlContext.createDataFrame(rdd)
+  }
+
+  test("one-vs-rest: default params") {
+    val numClasses = 3
+    val ova = new OneVsRest()
+    ova.setClassifier(new LogisticRegression)
+    assert(ova.getLabelCol === "label")
+    assert(ova.getPredictionCol === "prediction")
+    val ovaModel = ova.fit(dataset)
+    assert(ovaModel.models.size === numClasses)
+    val transformedDataset = ovaModel.transform(dataset)
+
+    // check for label metadata in prediction col
+    val predictionColSchema = transformedDataset.schema(ovaModel.getPredictionCol)
+    assert(MetadataUtils.getNumClasses(predictionColSchema) === Some(3))
+
+    val ovaResults = transformedDataset
+      .select("prediction", "label")
+      .map(row => (row.getDouble(0), row.getDouble(1)))
+
+    val lr = new LogisticRegressionWithLBFGS().setIntercept(true).setNumClasses(numClasses)
+    lr.optimizer.setRegParam(0.1).setNumIterations(100)
+
+    val model = lr.run(rdd)
+    val results = model.predict(rdd.map(_.features)).zip(rdd.map(_.label))
+    // determine the #confusion matrix in each class.
+    // bound how much error we allow compared to multinomial logistic regression.
+    val expectedMetrics = new MulticlassMetrics(results)
+    val ovaMetrics = new MulticlassMetrics(ovaResults)
+    assert(expectedMetrics.confusionMatrix ~== ovaMetrics.confusionMatrix absTol 400)
+  }
+
+  test("one-vs-rest: pass label metadata correctly during train") {
+    val numClasses = 3
+    val ova = new OneVsRest()
+    ova.setClassifier(new MockLogisticRegression)
+
+    val labelMetadata = NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
+    val labelWithMetadata = dataset("label").as("label", labelMetadata.toMetadata())
+    val features = dataset("features").as("features")
+    val datasetWithLabelMetadata = dataset.select(labelWithMetadata, features)
+    ova.fit(datasetWithLabelMetadata)
+  }
+}
+
+private class MockLogisticRegression extends LogisticRegression {
+
+  setMaxIter(1)
+
+  override protected def train(dataset: DataFrame): LogisticRegressionModel = {
+    val labelSchema = dataset.schema($(labelCol))
+    // check for label attribute propagation.
+    assert(MetadataUtils.getNumClasses(labelSchema).forall(_ == 2))
+    super.train(dataset)
+  }
+}

From 2a41c0d71a13558f12c6811bf98791e01186f3ad Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Tue, 12 May 2015 13:36:55 -0700
Subject: [PATCH 117/320] [SPARK-7569][SQL] Better error for invalid binary
 expressions

`scala> Seq((1,1)).toDF("a", "b").select(lit(1) + new java.sql.Date(1)) `

Before:

```
org.apache.spark.sql.AnalysisException: invalid expression (1 + 0) between Literal 1, IntegerType and Literal 0, DateType;
```

After:
```
org.apache.spark.sql.AnalysisException: invalid expression (1 + 0) between int and date;
```

Author: Michael Armbrust <michael@databricks.com>

Closes #6089 from marmbrus/betterBinaryError and squashes the following commits:

23b68ad [Michael Armbrust] [SPARK-7569][SQL] Better error for invalid binary expressions
---
 .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index c8288c6767004..f104e742c90fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -69,7 +69,7 @@ trait CheckAnalysis {
           case b: BinaryExpression if !b.resolved =>
             failAnalysis(
               s"invalid expression ${b.prettyString} " +
-                s"between ${b.left.simpleString} and ${b.right.simpleString}")
+              s"between ${b.left.dataType.simpleString} and ${b.right.dataType.simpleString}")
 
           case w @ WindowExpression(windowFunction, windowSpec) if windowSpec.validate.nonEmpty =>
             // The window spec is not valid.

From 23b9863e2aa7ecd0c4fa3aa8a59fdae09b4fe1d7 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 14:24:26 -0700
Subject: [PATCH 118/320] [SPARK-7559] [MLLIB] Bucketizer should include the
 right most boundary in the last bucket.

We make special treatment for +inf in `Bucketizer`. This could be simplified by always including the largest split value in the last bucket. E.g., (x1, x2, x3) defines buckets [x1, x2) and [x2, x3]. This shouldn't affect user code much, and there are applications that need to include the right-most value. For example, we can bucketize ratings from 0 to 10 to bad, neutral, and good with splits 0, 4, 6, 10. It may reads weird if the users need to put 0, 4, 6, 10.1 (or 11).

This also update the impl to use `Arrays.binarySearch` and `withClue` in test.

yinxusen jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6075 from mengxr/SPARK-7559 and squashes the following commits:

e28f910 [Xiangrui Meng] update bucketizer impl
---
 .../apache/spark/ml/feature/Bucketizer.scala  | 55 ++++++++++---------
 .../spark/ml/feature/BucketizerSuite.scala    | 25 +++++----
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 7dba64bc3506f..b28c88aaaecbc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.ml.feature
 
+import java.{util => ju}
+
+import org.apache.spark.SparkException
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
@@ -38,18 +41,19 @@ final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
   def this() = this(null)
 
   /**
-   * Parameter for mapping continuous features into buckets. With n splits, there are n+1 buckets.
-   * A bucket defined by splits x,y holds values in the range [x,y). Splits should be strictly
-   * increasing. Values at -inf, inf must be explicitly provided to cover all Double values;
+   * Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets.
+   * A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which
+   * also includes y. Splits should be strictly increasing.
+   * Values at -inf, inf must be explicitly provided to cover all Double values;
    * otherwise, values outside the splits specified will be treated as errors.
    * @group param
    */
   val splits: Param[Array[Double]] = new Param[Array[Double]](this, "splits",
-    "Split points for mapping continuous features into buckets. With n splits, there are n+1 " +
-      "buckets. A bucket defined by splits x,y holds values in the range [x,y). The splits " +
-      "should be strictly increasing. Values at -inf, inf must be explicitly provided to cover" +
-      " all Double values; otherwise, values outside the splits specified will be treated as" +
-      " errors.",
+    "Split points for mapping continuous features into buckets. With n+1 splits, there are n " +
+      "buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " +
+      "bucket, which also includes y. The splits should be strictly increasing. " +
+      "Values at -inf, inf must be explicitly provided to cover all Double values; " +
+      "otherwise, values outside the splits specified will be treated as errors.",
     Bucketizer.checkSplits)
 
   /** @group getParam */
@@ -104,28 +108,25 @@ private[feature] object Bucketizer {
 
   /**
    * Binary searching in several buckets to place each data point.
-   * @throws RuntimeException if a feature is < splits.head or >= splits.last
+   * @throws SparkException if a feature is < splits.head or > splits.last
    */
-  def binarySearchForBuckets(
-      splits: Array[Double],
-      feature: Double): Double = {
-    // Check bounds.  We make an exception for +inf so that it can exist in some bin.
-    if ((feature < splits.head) || (feature >= splits.last && feature != Double.PositiveInfinity)) {
-      throw new RuntimeException(s"Feature value $feature out of Bucketizer bounds" +
-        s" [${splits.head}, ${splits.last}).  Check your features, or loosen " +
-        s"the lower/upper bound constraints.")
-    }
-    var left = 0
-    var right = splits.length - 2
-    while (left < right) {
-      val mid = (left + right) / 2
-      val split = splits(mid + 1)
-      if (feature < split) {
-        right = mid
+  def binarySearchForBuckets(splits: Array[Double], feature: Double): Double = {
+    if (feature == splits.last) {
+      splits.length - 2
+    } else {
+      val idx = ju.Arrays.binarySearch(splits, feature)
+      if (idx >= 0) {
+        idx
       } else {
-        left = mid + 1
+        val insertPos = -idx - 1
+        if (insertPos == 0 || insertPos == splits.length) {
+          throw new SparkException(s"Feature value $feature out of Bucketizer bounds" +
+            s" [${splits.head}, ${splits.last}].  Check your features, or loosen " +
+            s"the lower/upper bound constraints.")
+        } else {
+          insertPos - 1
+        }
       }
     }
-    left
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index acb46c0a35709..1900820400aee 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -57,16 +57,18 @@ class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
 
     // Check for exceptions when using a set of invalid feature values.
     val invalidData1: Array[Double] = Array(-0.9) ++ validData
-    val invalidData2 = Array(0.5) ++ validData
+    val invalidData2 = Array(0.51) ++ validData
     val badDF1 = sqlContext.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx")
-    intercept[RuntimeException]{
-      bucketizer.transform(badDF1).collect()
-      println("Invalid feature value -0.9 was not caught as an invalid feature!")
+    withClue("Invalid feature value -0.9 was not caught as an invalid feature!") {
+      intercept[SparkException] {
+        bucketizer.transform(badDF1).collect()
+      }
     }
     val badDF2 = sqlContext.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx")
-    intercept[RuntimeException]{
-      bucketizer.transform(badDF2).collect()
-      println("Invalid feature value 0.5 was not caught as an invalid feature!")
+    withClue("Invalid feature value 0.51 was not caught as an invalid feature!") {
+      intercept[SparkException] {
+        bucketizer.transform(badDF2).collect()
+      }
     }
   }
 
@@ -137,12 +139,11 @@ private object BucketizerSuite extends FunSuite {
     }
     var i = 0
     while (i < splits.length - 1) {
-      testFeature(splits(i), i) // Split i should fall in bucket i.
-      testFeature((splits(i) + splits(i + 1)) / 2, i) // Value between splits i,i+1 should be in i.
+      // Split i should fall in bucket i.
+      testFeature(splits(i), i)
+      // Value between splits i,i+1 should be in i, which is also true if the (i+1)-th split is inf.
+      testFeature((splits(i) + splits(i + 1)) / 2, i)
       i += 1
     }
-    if (splits.last === Double.PositiveInfinity) {
-      testFeature(Double.PositiveInfinity, splits.length - 2)
-    }
   }
 }

From 455551d1c6cc206ffe1ff5ac52ca0ed89c61653d Mon Sep 17 00:00:00 2001
From: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com>
Date: Tue, 12 May 2015 14:37:23 -0700
Subject: [PATCH 119/320] [SPARK-7484][SQL]Support jdbc connection properties

Few jdbc drivers like SybaseIQ support passing username and password only through connection properties. So the same needs to be supported for
SQLContext.jdbc, dataframe.createJDBCTable and dataframe.insertIntoJDBC.
Added as default arguments or overrided function to support backward compatability.

Author: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com>

Closes #6009 from gvramana/add_jdbc_conn_properties and squashes the following commits:

396a0d0 [Venkata Ramana Gollamudi] fixed comments
d66dd8c [Venkata Ramana Gollamudi] fixed comments
1b8cd8c [Venkata Ramana Gollamudi] Support jdbc connection properties
---
 .../org/apache/spark/sql/DataFrame.scala      | 48 +++++++++--
 .../org/apache/spark/sql/SQLContext.scala     | 79 ++++++++++++++++---
 .../org/apache/spark/sql/jdbc/jdbc.scala      | 19 +++--
 .../spark/sql/jdbc/JDBCWriteSuite.scala       | 33 +++++---
 4 files changed, 148 insertions(+), 31 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 1f85dac682cbe..01fd432cc8190 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.io.CharArrayWriter
 import java.sql.DriverManager
+import java.util.Properties
 
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
@@ -1582,7 +1583,24 @@ class DataFrame private[sql](
    * @group output
    */
   def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
-    val conn = DriverManager.getConnection(url)
+    createJDBCTable(url, table, allowExisting, new Properties())
+  }
+    
+  /**
+   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`
+   * using connection properties defined in `properties`.
+   * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
+   * If you pass `true` for `allowExisting`, it will drop any table with the
+   * given name; if you pass `false`, it will throw if the table already
+   * exists.
+   * @group output
+   */
+  def createJDBCTable(
+      url: String,
+      table: String,
+      allowExisting: Boolean,
+      properties: Properties): Unit = {
+    val conn = DriverManager.getConnection(url, properties)
     try {
       if (allowExisting) {
         val sql = s"DROP TABLE IF EXISTS $table"
@@ -1594,7 +1612,7 @@ class DataFrame private[sql](
     } finally {
       conn.close()
     }
-    JDBCWriteDetails.saveTable(this, url, table)
+    JDBCWriteDetails.saveTable(this, url, table, properties)
   }
 
   /**
@@ -1610,8 +1628,29 @@ class DataFrame private[sql](
    * @group output
    */
   def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
+    insertIntoJDBC(url, table, overwrite, new Properties())
+  }
+
+  /**
+   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`
+   * using connection properties defined in `properties`.
+   * Assumes the table already exists and has a compatible schema.  If you
+   * pass `true` for `overwrite`, it will `TRUNCATE` the table before
+   * performing the `INSERT`s.
+   *
+   * The table must already exist on the database.  It must have a schema
+   * that is compatible with the schema of this RDD; inserting the rows of
+   * the RDD in order via the simple statement
+   * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
+   * @group output
+   */
+  def insertIntoJDBC(
+      url: String,
+      table: String,
+      overwrite: Boolean,
+      properties: Properties): Unit = {
     if (overwrite) {
-      val conn = DriverManager.getConnection(url)
+      val conn = DriverManager.getConnection(url, properties)
       try {
         val sql = s"TRUNCATE TABLE $table"
         conn.prepareStatement(sql).executeUpdate()
@@ -1619,9 +1658,8 @@ class DataFrame private[sql](
         conn.close()
       }
     }
-    JDBCWriteDetails.saveTable(this, url, table)
+    JDBCWriteDetails.saveTable(this, url, table, properties)
   }
-
   ////////////////////////////////////////////////////////////////////////////
   // for Python API
   ////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index afee09adaae20..70ba8985d6342 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -949,9 +949,21 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jdbc(url: String, table: String): DataFrame = {
-    jdbc(url, table, JDBCRelation.columnPartition(null))
+    jdbc(url, table, JDBCRelation.columnPartition(null), new Properties())
   }
-
+  
+  /**
+   * :: Experimental ::
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table and connection properties.
+   *
+   * @group specificdata
+   */
+  @Experimental
+  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
+    jdbc(url, table, JDBCRelation.columnPartition(null), properties)
+  }
+  
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
@@ -963,7 +975,31 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @param upperBound the maximum value of `columnName` used to decide partition stride
    * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
    *                      evenly into this many partitions
+   * @group specificdata
+   */
+  @Experimental
+  def jdbc(
+      url: String,
+      table: String,  
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int): DataFrame = {
+    jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties())
+  }
+
+  /**
+   * :: Experimental ::
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
+   * passed to this function.
    *
+   * @param columnName the name of a column of integral type that will be used for partitioning.
+   * @param lowerBound the minimum value of `columnName` used to decide partition stride
+   * @param upperBound the maximum value of `columnName` used to decide partition stride
+   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
+   *                      evenly into this many partitions
+   * @param properties connection properties
    * @group specificdata
    */
   @Experimental
@@ -973,16 +1009,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
       columnName: String,
       lowerBound: Long,
       upperBound: Long,
-      numPartitions: Int): DataFrame = {
+      numPartitions: Int,
+      properties: Properties): DataFrame = {
     val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions)
     val parts = JDBCRelation.columnPartition(partitioning)
-    jdbc(url, table, parts)
+    jdbc(url, table, parts, properties)
   }
-
+  
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table.  The theParts parameter gives a list expressions
+   * url named table. The theParts parameter gives a list expressions
    * suitable for inclusion in WHERE clauses; each one defines one partition
    * of the [[DataFrame]].
    *
@@ -990,14 +1027,36 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
+    jdbc(url, table, theParts, new Properties())
+  }
+
+  /**
+   * :: Experimental ::
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table using connection properties. The theParts parameter gives a list expressions
+   * suitable for inclusion in WHERE clauses; each one defines one partition
+   * of the [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @Experimental
+  def jdbc(
+      url: String,
+      table: String,
+      theParts: Array[String],
+      properties: Properties): DataFrame = {
     val parts: Array[Partition] = theParts.zipWithIndex.map { case (part, i) =>
       JDBCPartition(part, i) : Partition
     }
-    jdbc(url, table, parts)
+    jdbc(url, table, parts, properties)
   }
-
-  private def jdbc(url: String, table: String, parts: Array[Partition]): DataFrame = {
-    val relation = JDBCRelation(url, table, parts)(this)
+  
+  private def jdbc(
+      url: String,
+      table: String,
+      parts: Array[Partition],
+      properties: Properties): DataFrame = {
+    val relation = JDBCRelation(url, table, parts, properties)(this)
     baseRelationToDataFrame(relation)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
index 3a6c2c1e9101f..c099881a01226 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
@@ -57,9 +57,14 @@ package object jdbc {
      * non-Serializable.  Instead, we explicitly close over all variables that
      * are used.
      */
-    def savePartition(url: String, table: String, iterator: Iterator[Row],
-        rddSchema: StructType, nullTypes: Array[Int]): Iterator[Byte] = {
-      val conn = DriverManager.getConnection(url)
+    def savePartition(
+        url: String,
+        table: String,
+        iterator: Iterator[Row],
+        rddSchema: StructType,
+        nullTypes: Array[Int],
+        properties: Properties): Iterator[Byte] = {
+      val conn = DriverManager.getConnection(url, properties)
       var committed = false
       try {
         conn.setAutoCommit(false) // Everything in the same db transaction.
@@ -152,7 +157,11 @@ package object jdbc {
     /**
      * Saves the RDD to the database in a single transaction.
      */
-    def saveTable(df: DataFrame, url: String, table: String) {
+    def saveTable(
+        df: DataFrame,
+        url: String,
+        table: String,
+        properties: Properties = new Properties()) {
       val quirks = DriverQuirks.get(url)
       var nullTypes: Array[Int] = df.schema.fields.map(field => {
         var nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2
@@ -178,7 +187,7 @@ package object jdbc {
 
       val rddSchema = df.schema
       df.foreachPartition { iterator =>
-        JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes)
+        JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes, properties)
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index ee5c7620d1a22..f3ce8e66460e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.DriverManager
+import java.util.Properties
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
@@ -28,15 +29,25 @@ import org.apache.spark.sql.types._
 class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
   val url = "jdbc:h2:mem:testdb2"
   var conn: java.sql.Connection = null
-
+  val url1 = "jdbc:h2:mem:testdb3"
+  var conn1: java.sql.Connection = null
+  val properties = new Properties()
+  properties.setProperty("user", "testUser")
+  properties.setProperty("password", "testPass")
+  properties.setProperty("rowId", "false")
+    
   before {
     Class.forName("org.h2.Driver")
     conn = DriverManager.getConnection(url)
     conn.prepareStatement("create schema test").executeUpdate()
+   
+    conn1 = DriverManager.getConnection(url1, properties)
+    conn1.prepareStatement("create schema test").executeUpdate()
   }
 
   after {
     conn.close()
+    conn1.close()
   }
 
   val sc = TestSQLContext.sparkContext
@@ -65,13 +76,13 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x3), schema3)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url, "TEST.DROPTEST", false)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.DROPTEST").count)
-    assert(3 == TestSQLContext.jdbc(url, "TEST.DROPTEST").collect()(0).length)
+    df.createJDBCTable(url1, "TEST.DROPTEST", false, properties)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).count)
+    assert(3 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
 
-    df2.createJDBCTable(url, "TEST.DROPTEST", true)
-    assert(1 == TestSQLContext.jdbc(url, "TEST.DROPTEST").count)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.DROPTEST").collect()(0).length)
+    df2.createJDBCTable(url1, "TEST.DROPTEST", true, properties)
+    assert(1 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).count)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
   }
 
   test("CREATE then INSERT to append") {
@@ -88,10 +99,10 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url, "TEST.TRUNCATETEST", false)
-    df2.insertIntoJDBC(url, "TEST.TRUNCATETEST", true)
-    assert(1 == TestSQLContext.jdbc(url, "TEST.TRUNCATETEST").count)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.TRUNCATETEST").collect()(0).length)
+    df.createJDBCTable(url1, "TEST.TRUNCATETEST", false, properties)
+    df2.insertIntoJDBC(url1, "TEST.TRUNCATETEST", true, properties)
+    assert(1 == TestSQLContext.jdbc(url1, "TEST.TRUNCATETEST", properties).count)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
   }
 
   test("Incompatible INSERT to append") {

From a4874b0d1820efd24071108434a4d89429473fe3 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 14:39:03 -0700
Subject: [PATCH 120/320] [SPARK-7571] [MLLIB] rename Math to math

`scala.Math` is deprecated since 2.8. This PR only touchs `Math` usages in MLlib. dbtsai

Author: Xiangrui Meng <meng@databricks.com>

Closes #6092 from mengxr/SPARK-7571 and squashes the following commits:

fe8f8d3 [Xiangrui Meng] Math -> math
---
 .../spark/ml/classification/LogisticRegression.scala      | 4 ++--
 .../apache/spark/mllib/clustering/GaussianMixture.scala   | 2 +-
 .../scala/org/apache/spark/mllib/optimization/NNLS.scala  | 2 +-
 .../scala/org/apache/spark/mllib/stat/KernelDensity.scala | 4 ++--
 .../spark/ml/classification/LogisticRegressionSuite.scala | 2 +-
 .../org/apache/spark/mllib/feature/NormalizerSuite.scala  | 8 ++++----
 .../spark/mllib/linalg/distributed/RowMatrixSuite.scala   | 4 ++--
 .../org/apache/spark/mllib/optimization/LBFGSSuite.scala  | 2 +-
 .../spark/mllib/regression/IsotonicRegressionSuite.scala  | 2 +-
 9 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 647226a0d1a6e..93ba91167bfad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -175,7 +175,7 @@ class LogisticRegression
        * }}}
        */
       initialWeightsWithIntercept.toArray(numFeatures)
-        = Math.log(histogram(1).toDouble / histogram(0).toDouble)
+        = math.log(histogram(1).toDouble / histogram(0).toDouble)
     }
 
     val states = optimizer.iterations(new CachedDiffFunction(costFun),
@@ -285,7 +285,7 @@ class LogisticRegressionModel private[ml] (
     } else if (t == 1.0) {
       Double.PositiveInfinity
     } else {
-      Math.log(t / (1.0 - t))
+      math.log(t / (1.0 - t))
     }
     if (rawPrediction(1) > rawThreshold) 1 else 0
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 568b65305649f..c88410ac0ff43 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -160,7 +160,7 @@ class GaussianMixture private (
     var llhp = 0.0            // previous log-likelihood
     
     var iter = 0
-    while(iter < maxIterations && Math.abs(llh-llhp) > convergenceTol) {
+    while (iter < maxIterations && math.abs(llh-llhp) > convergenceTol) {
       // create and broadcast curried cluster contribution function
       val compute = sc.broadcast(ExpectationSum.add(weights, gaussians)_)
       
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index 4766f7708295d..64d52bae00907 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -91,7 +91,7 @@ private[spark] object NNLS {
     val dir = ws.dir
     val lastDir = ws.lastDir
     val res = ws.res
-    val iterMax = Math.max(400, 20 * n)
+    val iterMax = math.max(400, 20 * n)
     var lastNorm = 0.0
     var iterno = 0
     var lastWall = 0 // Last iteration when we hit a bound constraint.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index 0deef11b4511a..79747cc5d7d74 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -32,7 +32,7 @@ private[stat] object KernelDensity {
 
     // This gets used in each Gaussian PDF computation, so compute it up front
     val logStandardDeviationPlusHalfLog2Pi =
-      Math.log(standardDeviation) + 0.5 * Math.log(2 * Math.PI)
+      math.log(standardDeviation) + 0.5 * math.log(2 * math.Pi)
 
     val (points, count) = samples.aggregate((new Array[Double](evaluationPoints.length), 0))(
       (x, y) => {
@@ -66,6 +66,6 @@ private[stat] object KernelDensity {
     val x0 = x - mean
     val x1 = x0 / standardDeviation
     val logDensity = -0.5 * x1 * x1 - logStandardDeviationPlusHalfLog2Pi
-    Math.exp(logDensity)
+    math.exp(logDensity)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 78cdd471859b7..4df8016009171 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -489,7 +489,7 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
      * b = \log{P(1) / P(0)} = \log{count_1 / count_0}
      * }}}
      */
-    val interceptTheory = Math.log(histogram(1).toDouble / histogram(0).toDouble)
+    val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble)
     val weightsTheory = Array(0.0, 0.0, 0.0, 0.0)
 
     assert(model.intercept ~== interceptTheory relTol 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
index 85fdd271b5ed1..5c4af2b99e68b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
@@ -106,10 +106,10 @@ class NormalizerSuite extends FunSuite with MLlibTestSparkContext {
 
     assert((dataInf, dataInfRDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5))
 
-    assert(dataInf(0).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
-    assert(dataInf(2).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
-    assert(dataInf(3).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
-    assert(dataInf(4).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(0).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(2).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(3).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
+    assert(dataInf(4).toArray.map(math.abs).max ~== 1.0 absTol 1E-5)
 
     assert(dataInf(0) ~== Vectors.sparse(3, Seq((0, -0.86956522), (1, 1.0))) absTol 1E-5)
     assert(dataInf(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 3309713e91f87..27bb19f472e1e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -96,7 +96,7 @@ class RowMatrixSuite extends FunSuite with MLlibTestSparkContext {
   }
 
   test("similar columns") {
-    val colMags = Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94))
+    val colMags = Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94))
     val expected = BDM(
       (0.0, 54.0, 72.0),
       (0.0, 0.0, 78.0),
@@ -232,7 +232,7 @@ class RowMatrixSuite extends FunSuite with MLlibTestSparkContext {
         assert(summary.numNonzeros === Vectors.dense(3.0, 3.0, 4.0), "nnz mismatch")
         assert(summary.max === Vectors.dense(9.0, 7.0, 8.0), "max mismatch")
         assert(summary.min === Vectors.dense(0.0, 0.0, 1.0), "column mismatch.")
-        assert(summary.normL2 === Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94)),
+        assert(summary.normL2 === Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94)),
           "magnitude mismatch.")
         assert(summary.normL1 === Vectors.dense(18.0, 12.0, 16.0), "L1 norm mismatch")
       }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 70c64775e4c04..c8f2adcf155a7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -89,7 +89,7 @@ class LBFGSSuite extends FunSuite with MLlibTestSparkContext with Matchers {
     // it requires 90 iterations in GD. No matter how hard we increase
     // the number of iterations in GD here, the lossGD will be always
     // larger than lossLBFGS. This is based on observation, no theoretically guaranteed
-    assert(Math.abs((lossGD.last - loss.last) / loss.last) < 0.02,
+    assert(math.abs((lossGD.last - loss.last) / loss.last) < 0.02,
       "LBFGS should match GD result within 2% difference.")
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
index 8e12340bbd9d6..3b38bdf5ef5eb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.Utils
 class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
 
   private def round(d: Double) = {
-    Math.round(d * 100).toDouble / 100
+    math.round(d * 100).toDouble / 100
   }
 
   private def generateIsotonicInput(labels: Seq[Double]): Seq[(Double, Double, Double)] = {

From 1422e79e517ca14a6b0e178f015362d2e0d413c6 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Tue, 12 May 2015 14:41:21 -0700
Subject: [PATCH 121/320] [SPARK-7406] [STREAMING] [WEBUI] Add tooltips for
 "Scheduling Delay", "Processing Time" and "Total Delay"

Screenshots:
![screen shot 2015-05-06 at 2 29 03 pm](https://cloud.githubusercontent.com/assets/1000778/7504129/9c57f710-f3fc-11e4-9c6e-1b79c17c546d.png)

![screen shot 2015-05-06 at 2 24 35 pm](https://cloud.githubusercontent.com/assets/1000778/7504140/b63bb216-f3fc-11e4-83a5-6dfc6481d192.png)

tdas as we discussed offline

Author: zsxwing <zsxwing@gmail.com>

Closes #5952 from zsxwing/SPARK-7406 and squashes the following commits:

2b004ea [zsxwing] Merge branch 'master' into SPARK-7406
e9eb506 [zsxwing] Update tooltip contents
2215b2a [zsxwing] Add tooltips for "Scheduling Delay", "Processing Time" and "Total Delay"
---
 .../src/main/scala/org/apache/spark/ui/UIUtils.scala | 12 +++++++-----
 .../apache/spark/streaming/ui/AllBatchesTable.scala  | 11 ++++++++---
 .../apache/spark/streaming/ui/StreamingPage.scala    |  6 +++---
 .../org/apache/spark/streaming/UISeleniumSuite.scala |  6 +++---
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 441c97d6d3e2a..ad16becde85dd 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -194,11 +194,7 @@ private[spark] object UIUtils extends Logging {
         <a href={prependBaseUri(activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
       </li>
     }
-    val helpButton: Seq[Node] = helpText.map { helpText =>
-      <sup>
-        (<a data-toggle="tooltip" data-placement="bottom" title={helpText}>?</a>)
-      </sup>
-    }.getOrElse(Seq.empty)
+    val helpButton: Seq[Node] = helpText.map(tooltip(_, "bottom")).getOrElse(Seq.empty)
 
     <html>
       <head>
@@ -375,6 +371,12 @@ private[spark] object UIUtils extends Logging {
     </div>
   }
 
+  def tooltip(text: String, position: String): Seq[Node] = {
+    <sup>
+      (<a data-toggle="tooltip" data-placement={position} title={text}>?</a>)
+    </sup>
+  }
+
   /** Return a script element that automatically expands the DAG visualization on page load. */
   def expandDagVizOnLoad(forJob: Boolean): Seq[Node] = {
     <script type="text/javascript">
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
index 2960b528d4c5e..3619e129ad9cf 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
@@ -26,8 +26,11 @@ private[ui] abstract class BatchTableBase(tableId: String) {
   protected def columns: Seq[Node] = {
     <th>Batch Time</th>
       <th>Input Size</th>
-      <th>Scheduling Delay</th>
-      <th>Processing Time</th>
+      <th>Scheduling Delay
+        {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "top")}
+      </th>
+      <th>Processing Time
+        {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "top")}</th>
   }
 
   protected def baseRow(batch: BatchUIData): Seq[Node] = {
@@ -99,7 +102,9 @@ private[ui] class ActiveBatchTable(
 private[ui] class CompletedBatchTable(batches: Seq[BatchUIData])
   extends BatchTableBase("completed-batches-table") {
 
-  override protected def columns: Seq[Node] = super.columns ++ <th>Total Delay</th>
+  override protected def columns: Seq[Node] = super.columns ++
+    <th>Total Delay
+      {SparkUIUtils.tooltip("Total time taken to handle a batch", "top")}</th>
 
   override protected def renderRows: Seq[Node] = {
     batches.flatMap(batch => <tr>{completedBatchRow(batch)}</tr>)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index ecbebe5c6c97b..ff0f2b18dc321 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -347,7 +347,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Scheduling Delay</strong></div>
+              <div><strong>Scheduling Delay {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "right")}</strong></div>
               <div>Avg: {schedulingDelay.formattedAvg}</div>
             </div>
           </td>
@@ -357,7 +357,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Processing Time</strong></div>
+              <div><strong>Processing Time {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "right")}</strong></div>
               <div>Avg: {processingTime.formattedAvg}</div>
             </div>
           </td>
@@ -367,7 +367,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Total Delay</strong></div>
+              <div><strong>Total Delay {SparkUIUtils.tooltip("Total time taken to handle a batch", "right")}</strong></div>
               <div>Avg: {totalDelay.formattedAvg}</div>
             </div>
           </td>
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index 2211f62383ce8..441bbf95d0153 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -116,12 +116,12 @@ class UISeleniumSuite
         h4Text.exists(_.matches("Completed Batches \\(last \\d+ out of \\d+\\)")) should be (true)
 
         findAll(cssSelector("""#active-batches-table th""")).map(_.text).toSeq should be {
-          List("Batch Time", "Input Size", "Scheduling Delay", "Processing Time",
+          List("Batch Time", "Input Size", "Scheduling Delay (?)", "Processing Time (?)",
             "Status")
         }
         findAll(cssSelector("""#completed-batches-table th""")).map(_.text).toSeq should be {
-          List("Batch Time", "Input Size", "Scheduling Delay", "Processing Time",
-            "Total Delay")
+          List("Batch Time", "Input Size", "Scheduling Delay (?)", "Processing Time (?)",
+            "Total Delay (?)")
         }
 
         val batchLinks =

From 1d703660d4d14caea697affdf31170aea44c8903 Mon Sep 17 00:00:00 2001
From: Yuhao Yang <hhbyyh@gmail.com>
Date: Tue, 12 May 2015 15:12:29 -0700
Subject: [PATCH 122/320] [SPARK-7496] [MLLIB] Update Programming guide with
 Online LDA

jira: https://issues.apache.org/jira/browse/SPARK-7496

Update LDA subsection of clustering section of MLlib programming guide to include OnlineLDA.

Author: Yuhao Yang <hhbyyh@gmail.com>

Closes #6046 from hhbyyh/ldaDocument and squashes the following commits:

4b6fbfa [Yuhao Yang] add online paper and some comparison
fd4c983 [Yuhao Yang] update lda document for optimizers
---
 docs/mllib-clustering.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index f5aa15b7d9b79..f41ca70952eb7 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -377,11 +377,11 @@ LDA can be thought of as a clustering algorithm as follows:
  on a statistical model of how text documents are generated.
 
 LDA takes in a collection of documents as vectors of word counts.
-It learns clustering using [expectation-maximization](http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm)
-on the likelihood function. After fitting on the documents, LDA provides:
+It supports different inference algorithms via `setOptimizer` function. EMLDAOptimizer learns clustering using [expectation-maximization](http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm)
+on the likelihood function and yields comprehensive results, while OnlineLDAOptimizer uses iterative mini-batch sampling for [online variational inference](https://www.cs.princeton.edu/~blei/papers/HoffmanBleiBach2010b.pdf) and is generally memory friendly. After fitting on the documents, LDA provides:
 
 * Topics: Inferred topics, each of which is a probability distribution over terms (words).
-* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics.
+* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics. (EM only)
 
 LDA takes the following parameters:
 

From f0c1bc3472a7422ae5649634f29c88e161f5ecaf Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 12 May 2015 16:39:56 -0700
Subject: [PATCH 123/320] [SPARK-7557] [ML] [DOC] User guide for spark.ml
 HashingTF, Tokenizer

Added feature transformer subsection to spark.ml guide, with HashingTF and Tokenizer.  Added JavaHashingTFSuite to test Java examples in new guide.

I've run Scala, Python examples in the Spark/PySpark shells.  I ran the Java examples via the test suite (with small modifications for printing).

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6093 from jkbradley/hashingtf-guide and squashes the following commits:

d5d213f [Joseph K. Bradley] small fix
dd6e91a [Joseph K. Bradley] fixes from code review of user guide
33c3ff9 [Joseph K. Bradley] small fix
bc6058c [Joseph K. Bradley] fix link
361a174 [Joseph K. Bradley] Added subsection for feature transformers to spark.ml guide, with HashingTF and Tokenizer.  Added JavaHashingTFSuite to test Java examples in new guide
---
 docs/ml-features.md                           | 188 ++++++++++++++++++
 docs/ml-guide.md                              |   9 +
 .../spark/ml/feature/JavaHashingTFSuite.java  |  81 ++++++++
 3 files changed, 278 insertions(+)
 create mode 100644 docs/ml-features.md
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java

diff --git a/docs/ml-features.md b/docs/ml-features.md
new file mode 100644
index 0000000000000..0cbebcb739b14
--- /dev/null
+++ b/docs/ml-features.md
@@ -0,0 +1,188 @@
+---
+layout: global
+title: Feature Extraction, Transformation, and Selection - SparkML
+displayTitle: <a href="ml-guide.html">ML</a> - Features
+---
+
+This section covers algorithms for working with features, roughly divided into these groups:
+
+* Extraction: Extracting features from "raw" data
+* Transformation: Scaling, converting, or modifying features
+* Selection: Selecting a subset from a larger set of features
+
+**Table of Contents**
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+
+# Feature Extractors
+
+## Hashing Term-Frequency (HashingTF)
+
+`HashingTF` is a `Transformer` which takes sets of terms (e.g., `String` terms can be sets of words) and converts those sets into fixed-length feature vectors.
+The algorithm combines [Term Frequency (TF)](http://en.wikipedia.org/wiki/Tf%E2%80%93idf) counts with the [hashing trick](http://en.wikipedia.org/wiki/Feature_hashing) for dimensionality reduction.  Please refer to the [MLlib user guide on TF-IDF](mllib-feature-extraction.html#tf-idf) for more details on Term-Frequency.
+
+HashingTF is implemented in
+[HashingTF](api/scala/index.html#org.apache.spark.ml.feature.HashingTF).
+In the following code segment, we start with a set of sentences.  We split each sentence into words using `Tokenizer`.  For each sentence (bag of words), we hash it into a feature vector.  This feature vector could then be passed to a learning algorithm.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+
+val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+)).toDF("label", "sentence")
+val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+val wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+val hashingTF = new HashingTF().setInputCol("words").setOutputCol("features").setNumFeatures(20)
+val featurized = hashingTF.transform(wordsDataFrame)
+featurized.select("features", "label").take(3).foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(0, "Hi I heard about Spark"),
+  RowFactory.create(0, "I wish Java could use case classes"),
+  RowFactory.create(1, "Logistic regression models are neat")
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+  new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+});
+DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+int numFeatures = 20;
+HashingTF hashingTF = new HashingTF()
+  .setInputCol("words")
+  .setOutputCol("features")
+  .setNumFeatures(numFeatures);
+DataFrame featurized = hashingTF.transform(wordsDataFrame);
+for (Row r : featurized.select("features", "label").take(3)) {
+  Vector features = r.getAs(0);
+  Double label = r.getDouble(1);
+  System.out.println(features);
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import HashingTF, Tokenizer
+
+sentenceDataFrame = sqlContext.createDataFrame([
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+], ["label", "sentence"])
+tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
+featurized = hashingTF.transform(wordsDataFrame)
+for features_label in featurized.select("features", "label").take(3):
+  print features_label
+{% endhighlight %}
+</div>
+</div>
+
+
+# Feature Transformers
+
+## Tokenizer
+
+[Tokenization](http://en.wikipedia.org/wiki/Lexical_analysis#Tokenization) is the process of taking text (such as a sentence) and breaking it into individual terms (usually words).  A simple [Tokenizer](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer) class provides this functionality.  The example below shows how to split sentences into sequences of words.
+
+Note: A more advanced tokenizer is provided via [RegexTokenizer](api/scala/index.html#org.apache.spark.ml.feature.RegexTokenizer).
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.Tokenizer
+
+val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+)).toDF("label", "sentence")
+val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+val wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+wordsDataFrame.select("words", "label").take(3).foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(0, "Hi I heard about Spark"),
+  RowFactory.create(0, "I wish Java could use case classes"),
+  RowFactory.create(1, "Logistic regression models are neat")
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+  new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+});
+DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+for (Row r : wordsDataFrame.select("words", "label").take(3)) {
+  java.util.List<String> words = r.getList(0);
+  for (String word : words) System.out.print(word + " ");
+  System.out.println();
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import Tokenizer
+
+sentenceDataFrame = sqlContext.createDataFrame([
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+], ["label", "sentence"])
+tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+wordsDataFrame = tokenizer.transform(sentenceDataFrame)
+for words_label in wordsDataFrame.select("words", "label").take(3):
+  print words_label
+{% endhighlight %}
+</div>
+</div>
+
+
+# Feature Selectors
+
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 771a07183e26f..b7b6376e061f7 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -148,6 +148,15 @@ Parameters belong to specific instances of `Estimator`s and `Transformer`s.
 For example, if we have two `LogisticRegression` instances `lr1` and `lr2`, then we can build a `ParamMap` with both `maxIter` parameters specified: `ParamMap(lr1.maxIter -> 10, lr2.maxIter -> 20)`.
 This is useful if there are two algorithms with the `maxIter` parameter in a `Pipeline`.
 
+# Algorithm Guides
+
+There are now several algorithms in the Pipelines API which are not in the lower-level MLlib API, so we link to documentation for them here.  These algorithms are mostly feature transformers, which fit naturally into the `Transformer` abstraction in Pipelines.
+
+**Pipelines API Algorithm Guides**
+
+* [Feature Extraction, Transformation, and Selection](ml-features.html)
+
+
 # Code Examples
 
 This section gives code examples illustrating the functionality discussed above.
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
new file mode 100644
index 0000000000000..23463ab5fe848
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+
+public class JavaHashingTFSuite {
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaHashingTFSuite");
+    jsql = new SQLContext(jsc);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void hashingTF() {
+    JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+      RowFactory.create(0, "Hi I heard about Spark"),
+      RowFactory.create(0, "I wish Java could use case classes"),
+      RowFactory.create(1, "Logistic regression models are neat")
+    ));
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+    });
+    DataFrame sentenceDataFrame = jsql.createDataFrame(jrdd, schema);
+
+    Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+    DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+    int numFeatures = 20;
+    HashingTF hashingTF = new HashingTF()
+      .setInputCol("words")
+      .setOutputCol("features")
+      .setNumFeatures(numFeatures);
+    DataFrame featurized = hashingTF.transform(wordsDataFrame);
+    for (Row r : featurized.select("features", "words", "label").take(3)) {
+      Vector features = r.getAs(0);
+      Assert.assertEquals(features.size(), numFeatures);
+    }
+  }
+}

From 96c4846db89802f5a81dca5dcfa3f2a0f72b5cb8 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 12 May 2015 16:42:30 -0700
Subject: [PATCH 124/320] [SPARK-7573] [ML] OneVsRest cleanups

Minor cleanups discussed with [~mengxr]:
* move OneVsRest from reduction to classification sub-package
* make model constructor private

Some doc cleanups too

CC: harsha2010  Could you please verify this looks OK?  Thanks!

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6097 from jkbradley/onevsrest-cleanup and squashes the following commits:

4ecd48d [Joseph K. Bradley] org imports
430b065 [Joseph K. Bradley] moved OneVsRest from reduction subpackage to classification.  small java doc style fixes
9f8b9b9 [Joseph K. Bradley] Small cleanups to OneVsRest.  Made model constructor private to ml package.
---
 .../OneVsRest.scala                           | 32 +++++++++----------
 .../JavaOneVsRestSuite.java                   | 13 +++-----
 .../OneVsRestSuite.scala                      |  9 ++----
 3 files changed, 23 insertions(+), 31 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/ml/{reduction => classification}/OneVsRest.scala (90%)
 rename mllib/src/test/java/org/apache/spark/ml/{reduction => classification}/JavaOneVsRestSuite.java (90%)
 rename mllib/src/test/scala/org/apache/spark/ml/{reduction => classification}/OneVsRestSuite.scala (92%)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
similarity index 90%
rename from mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
rename to mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 0a6728ef1f779..afb8d75d57384 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/reduction/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.reduction
+package org.apache.spark.ml.classification
 
 import java.util.UUID
 
@@ -24,7 +24,6 @@ import scala.language.existentials
 import org.apache.spark.annotation.{AlphaComponent, Experimental}
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.classification.{ClassificationModel, Classifier}
 import org.apache.spark.ml.param.Param
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.linalg.Vector
@@ -57,20 +56,21 @@ private[ml] trait OneVsRestParams extends PredictorParams {
 }
 
 /**
+ * :: AlphaComponent ::
+ *
  * Model produced by [[OneVsRest]].
- * Stores the models resulting from training k different classifiers:
- * one for each class.
- * Each example is scored against all k models and the model with highest score
+ * This stores the models resulting from training k binary classifiers: one for each class.
+ * Each example is scored against all k models, and the model with the highest score
  * is picked to label the example.
- * TODO: API may need to change when we introduce a ClassificationModel trait as the public API
- * @param parent
+ *
  * @param labelMetadata Metadata of label column if it exists, or Nominal attribute
  *                      representing the number of classes in training dataset otherwise.
- * @param models the binary classification models for reduction.
- *               The i-th model is produced by testing the i-th class vs the rest.
+ * @param models The binary classification models for the reduction.
+ *               The i-th model is produced by testing the i-th class (taking label 1) vs the rest
+ *               (taking label 0).
  */
 @AlphaComponent
-class OneVsRestModel(
+class OneVsRestModel private[ml] (
       override val parent: OneVsRest,
       labelMetadata: Metadata,
       val models: Array[_ <: ClassificationModel[_,_]])
@@ -90,7 +90,7 @@ class OneVsRestModel(
     // add an accumulator column to store predictions of all the models
     val accColName = "mbc$acc" + UUID.randomUUID().toString
     val init: () => Map[Int, Double] = () => {Map()}
-    val mapType = MapType(IntegerType, DoubleType, false)
+    val mapType = MapType(IntegerType, DoubleType, valueContainsNull = false)
     val newDataset = dataset.withColumn(accColName, callUDF(init, mapType))
 
     // persist if underlying dataset is not persistent.
@@ -101,7 +101,7 @@ class OneVsRestModel(
 
     // update the accumulator column with the result of prediction of models
     val aggregatedDataset = models.zipWithIndex.foldLeft[DataFrame](newDataset) {
-      case (df, (model, index)) => {
+      case (df, (model, index)) =>
         val rawPredictionCol = model.getRawPredictionCol
         val columns = origCols ++ List(col(rawPredictionCol), col(accColName))
 
@@ -110,7 +110,7 @@ class OneVsRestModel(
         val update: (Map[Int, Double], Vector) => Map[Int, Double]  =
           (predictions: Map[Int, Double], prediction: Vector) => {
             predictions + ((index, prediction(1)))
-        }
+          }
         val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol))
         val transformedDataset = model.transform(df).select(columns:_*)
         val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf)
@@ -118,7 +118,6 @@ class OneVsRestModel(
 
         // switch out the intermediate column with the accumulator column
         updatedDataset.select(newColumns:_*).withColumnRenamed(tmpColName, accColName)
-      }
     }
 
     if (handlePersistence) {
@@ -149,8 +148,8 @@ class OneVsRestModel(
 final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
 
   /** @group setParam */
-  // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed.
   def setClassifier(value: Classifier[_,_,_]): this.type = {
+    // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed
     set(classifier, value.asInstanceOf[ClassifierType])
   }
 
@@ -201,9 +200,8 @@ final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
     // extract label metadata from label column if present, or create a nominal attribute
     // to output the number of labels
     val labelAttribute = Attribute.fromStructField(labelSchema) match {
-      case _: NumericAttribute | UnresolvedAttribute => {
+      case _: NumericAttribute | UnresolvedAttribute =>
         NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
-      }
       case attr: Attribute => attr
     }
     copyValues(new OneVsRestModel(this, labelAttribute.toMetadata(), models))
diff --git a/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
similarity index 90%
rename from mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
rename to mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
index 40a90ae9ded60..a1ee554152372 100644
--- a/mllib/src/test/java/org/apache/spark/ml/reduction/JavaOneVsRestSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
@@ -15,21 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.reduction;
+package org.apache.spark.ml.classification;
 
 import java.io.Serializable;
 import java.util.List;
 
+import static scala.collection.JavaConversions.seqAsJavaList;
+
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import static scala.collection.JavaConversions.seqAsJavaList;
-
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.ml.classification.LogisticRegression;
 import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateMultinomialLogisticInput;
 import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.sql.DataFrame;
@@ -48,10 +47,8 @@ public void setUp() {
         jsql = new SQLContext(jsc);
         int nPoints = 3;
 
-        /**
-         * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
-         * As a result, we are actually drawing samples from probability distribution of built model.
-         */
+        // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+        // As a result, we are drawing samples from probability distribution of an actual model.
         double[] weights = {
                 -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
                 -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 };
diff --git a/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
similarity index 92%
rename from mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
rename to mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index ebec7c68e8144..e65ffae918ca9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/reduction/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -15,12 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.ml.reduction
+package org.apache.spark.ml.classification
 
 import org.scalatest.FunSuite
 
 import org.apache.spark.ml.attribute.NominalAttribute
-import org.apache.spark.ml.classification.{LogisticRegressionModel, LogisticRegression}
 import org.apache.spark.ml.util.MetadataUtils
 import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
@@ -42,10 +41,8 @@ class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
     sqlContext = new SQLContext(sc)
     val nPoints = 1000
 
-    /**
-     * The following weights and xMean/xVariance are computed from iris dataset with lambda = 0.2.
-     * As a result, we are actually drawing samples from probability distribution of built model.
-     */
+    // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
+    // As a result, we are drawing samples from probability distribution of an actual model.
     val weights = Array(
       -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
       -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)

From 00e7b09a0bee2fcfd0ce34992bd26435758daf26 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 12 May 2015 16:44:14 -0700
Subject: [PATCH 125/320] [SPARK-7553] [STREAMING] Added methods to maintain a
 singleton StreamingContext

In a REPL/notebook environment, its very easy to lose a reference to a StreamingContext by overriding the variable name. So if you happen to execute the following commands
```
val ssc = new StreamingContext(...) // cmd 1
ssc.start() // cmd 2
...
val ssc = new StreamingContext(...) // accidentally run cmd 1 again
```
The value of ssc will be overwritten. Now you can neither start the new context (as only one context can be started), nor stop the previous context (as the reference is lost).
Hence its best to maintain a singleton reference to the active context, so that we never loose reference for the active context.
Since this problem occurs useful in REPL environments, its best to add this as an Experimental support in the Scala API only so that it can be used in Scala REPLs and notebooks.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6070 from tdas/SPARK-7553 and squashes the following commits:

731c9a1 [Tathagata Das] Fixed style
a797171 [Tathagata Das] Added more unit tests
19fc70b [Tathagata Das] Added :: Experimental :: in docs
64706c9 [Tathagata Das] Fixed test
634db5d [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7553
3884a25 [Tathagata Das] Fixing test bug
d37a846 [Tathagata Das] Added getActive and getActiveOrCreate
---
 .../spark/streaming/StreamingContext.scala    |  61 ++++++-
 .../streaming/StreamingContextSuite.scala     | 152 +++++++++++++++++-
 2 files changed, 202 insertions(+), 11 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 8461e901208ff..407cab45ed4c6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -637,8 +637,10 @@ class StreamingContext private[streaming] (
  */
 
 object StreamingContext extends Logging {
+
   /**
-   * Lock that guards access to global variables that track active StreamingContext.
+   * Lock that guards activation of a StreamingContext as well as access to the singleton active
+   * StreamingContext in getActiveOrCreate().
    */
   private val ACTIVATION_LOCK = new Object()
 
@@ -661,6 +663,18 @@ object StreamingContext extends Logging {
     }
   }
 
+  /**
+   * :: Experimental ::
+   *
+   * Get the currently active context, if there is one. Active means started but not stopped.
+   */
+  @Experimental
+  def getActive(): Option[StreamingContext] = {
+    ACTIVATION_LOCK.synchronized {
+      Option(activeContext.get())
+    }
+  }
+
   @deprecated("Replaced by implicit functions in the DStream companion object. This is " +
     "kept here only for backward compatibility.", "1.3.0")
   def toPairDStreamFunctions[K, V](stream: DStream[(K, V)])
@@ -669,6 +683,48 @@ object StreamingContext extends Logging {
     DStream.toPairDStreamFunctions(stream)(kt, vt, ord)
   }
 
+  /**
+   * :: Experimental ::
+   *
+   * Either return the "active" StreamingContext (that is, started but not stopped), or create a
+   * new StreamingContext that is
+   * @param creatingFunc   Function to create a new StreamingContext
+   */
+  @Experimental
+  def getActiveOrCreate(creatingFunc: () => StreamingContext): StreamingContext = {
+    ACTIVATION_LOCK.synchronized {
+      getActive().getOrElse { creatingFunc() }
+    }
+  }
+
+  /**
+   * :: Experimental ::
+   *
+   * Either get the currently active StreamingContext (that is, started but not stopped),
+   * OR recreate a StreamingContext from checkpoint data in the given path. If checkpoint data
+   * does not exist in the provided, then create a new StreamingContext by calling the provided
+   * `creatingFunc`.
+   *
+   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
+   * @param creatingFunc   Function to create a new StreamingContext
+   * @param hadoopConf     Optional Hadoop configuration if necessary for reading from the
+   *                       file system
+   * @param createOnError  Optional, whether to create a new StreamingContext if there is an
+   *                       error in reading checkpoint data. By default, an exception will be
+   *                       thrown on error.
+   */
+  @Experimental
+  def getActiveOrCreate(
+      checkpointPath: String,
+      creatingFunc: () => StreamingContext,
+      hadoopConf: Configuration = new Configuration(),
+      createOnError: Boolean = false
+    ): StreamingContext = {
+    ACTIVATION_LOCK.synchronized {
+      getActive().getOrElse { getOrCreate(checkpointPath, creatingFunc, hadoopConf, createOnError) }
+    }
+  }
+
   /**
    * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
    * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
@@ -694,7 +750,6 @@ object StreamingContext extends Logging {
     checkpointOption.map(new StreamingContext(null, _, null)).getOrElse(creatingFunc())
   }
 
-
   /**
    * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
    * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
@@ -761,7 +816,7 @@ object StreamingContext extends Logging {
     ): SparkContext = {
     val conf = SparkContext.updatedConf(
       new SparkConf(), master, appName, sparkHome, jars, environment)
-    createNewSparkContext(conf)
+    new SparkContext(conf)
   }
 
   private[streaming] def rddToFileName[T](prefix: String, suffix: String, time: Time): String = {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 47299513de095..5d09b234f77ce 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -41,6 +41,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   val batchDuration = Milliseconds(500)
   val sparkHome = "someDir"
   val envPair = "key" -> "value"
+  val conf = new SparkConf().setMaster(master).setAppName(appName)
 
   var sc: SparkContext = null
   var ssc: StreamingContext = null
@@ -390,23 +391,23 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
       assert(newContextCreated, "new context not created")
     }
 
-    val corrutedCheckpointPath = createCorruptedCheckpoint()
+    val corruptedCheckpointPath = createCorruptedCheckpoint()
 
     // getOrCreate should throw exception with fake checkpoint file and createOnError = false
     intercept[Exception] {
-      ssc = StreamingContext.getOrCreate(corrutedCheckpointPath, creatingFunction _)
+      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _)
     }
 
     // getOrCreate should throw exception with fake checkpoint file
     intercept[Exception] {
       ssc = StreamingContext.getOrCreate(
-        corrutedCheckpointPath, creatingFunction _, createOnError = false)
+        corruptedCheckpointPath, creatingFunction _, createOnError = false)
     }
 
     // getOrCreate should create new context with fake checkpoint file and createOnError = true
     testGetOrCreate {
       ssc = StreamingContext.getOrCreate(
-        corrutedCheckpointPath, creatingFunction _, createOnError = true)
+        corruptedCheckpointPath, creatingFunction _, createOnError = true)
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
     }
@@ -491,8 +492,145 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("getActive and getActiveOrCreate") {
+    require(StreamingContext.getActive().isEmpty, "context exists from before")
+    sc = new SparkContext(conf)
+
+    var newContextCreated = false
+
+    def creatingFunc(): StreamingContext = {
+      newContextCreated = true
+      val newSsc = new StreamingContext(sc, batchDuration)
+      val input = addInputStream(newSsc)
+      input.foreachRDD { rdd => rdd.count }
+      newSsc
+    }
+
+    def testGetActiveOrCreate(body: => Unit): Unit = {
+      newContextCreated = false
+      try {
+        body
+      } finally {
+
+        if (ssc != null) {
+          ssc.stop(stopSparkContext = false)
+        }
+        ssc = null
+      }
+    }
+
+    // getActiveOrCreate should create new context and getActive should return it only
+    // after starting the context
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(creatingFunc _)
+      assert(ssc != null, "no context created")
+      assert(newContextCreated === true, "new context not created")
+      assert(StreamingContext.getActive().isEmpty,
+        "new initialized context returned before starting")
+      ssc.start()
+      assert(StreamingContext.getActive() === Some(ssc),
+        "active context not returned")
+      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+        "active context not returned")
+      ssc.stop()
+      assert(StreamingContext.getActive().isEmpty,
+        "inactive context returned")
+      assert(StreamingContext.getActiveOrCreate(creatingFunc _) !== ssc,
+        "inactive context returned")
+    }
+
+    // getActiveOrCreate and getActive should return independently created context after activating
+    testGetActiveOrCreate {
+      ssc = creatingFunc()  // Create
+      assert(StreamingContext.getActive().isEmpty,
+        "new initialized context returned before starting")
+      ssc.start()
+      assert(StreamingContext.getActive() === Some(ssc),
+        "active context not returned")
+      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+        "active context not returned")
+      ssc.stop()
+      assert(StreamingContext.getActive().isEmpty,
+        "inactive context returned")
+    }
+  }
+
+  test("getActiveOrCreate with checkpoint") {
+    // Function to create StreamingContext that has a config to identify it to be new context
+    var newContextCreated = false
+    def creatingFunction(): StreamingContext = {
+      newContextCreated = true
+      new StreamingContext(conf, batchDuration)
+    }
+
+    // Call ssc.stop after a body of code
+    def testGetActiveOrCreate(body: => Unit): Unit = {
+      require(StreamingContext.getActive().isEmpty) // no active context
+      newContextCreated = false
+      try {
+        body
+      } finally {
+        if (ssc != null) {
+          ssc.stop()
+        }
+        ssc = null
+      }
+    }
+
+    val emptyPath = Utils.createTempDir().getAbsolutePath()
+    val corruptedCheckpointPath = createCorruptedCheckpoint()
+    val checkpointPath = createValidCheckpoint()
+
+    // getActiveOrCreate should return the current active context if there is one
+    testGetActiveOrCreate {
+      ssc = new StreamingContext(
+        conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"), batchDuration)
+      addInputStream(ssc).register()
+      ssc.start()
+      val returnedSsc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _)
+      assert(!newContextCreated, "new context created instead of returning")
+      assert(returnedSsc.eq(ssc), "returned context is not the activated context")
+    }
+
+    // getActiveOrCreate should create new context with empty path
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(emptyPath, creatingFunction _)
+      assert(ssc != null, "no context created")
+      assert(newContextCreated, "new context not created")
+    }
+
+    // getActiveOrCreate should throw exception with fake checkpoint file and createOnError = false
+    intercept[Exception] {
+      ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _)
+    }
+
+    // getActiveOrCreate should throw exception with fake checkpoint file
+    intercept[Exception] {
+      ssc = StreamingContext.getActiveOrCreate(
+        corruptedCheckpointPath, creatingFunction _, createOnError = false)
+    }
+
+    // getActiveOrCreate should create new context with fake
+    // checkpoint file and createOnError = true
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(
+        corruptedCheckpointPath, creatingFunction _, createOnError = true)
+      assert(ssc != null, "no context created")
+      assert(newContextCreated, "new context not created")
+    }
+
+    // getActiveOrCreate should recover context with checkpoint path, and recover old configuration
+    testGetActiveOrCreate {
+      ssc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _)
+      assert(ssc != null, "no context created")
+      assert(!newContextCreated, "old context not recovered")
+      assert(ssc.conf.get("someKey") === "someValue")
+    }
+  }
+
   test("multiple streaming contexts") {
-    sc = new SparkContext(new SparkConf().setMaster(master).setAppName(appName))
+    sc = new SparkContext(
+      conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"))
     ssc = new StreamingContext(sc, Seconds(1))
     val input = addInputStream(ssc)
     input.foreachRDD { rdd => rdd.count }
@@ -522,9 +660,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
   def createValidCheckpoint(): String = {
     val testDirectory = Utils.createTempDir().getAbsolutePath()
     val checkpointDirectory = Utils.createTempDir().getAbsolutePath()
-    val conf = new SparkConf().setMaster(master).setAppName(appName)
-    conf.set("someKey", "someValue")
-    ssc = new StreamingContext(conf, batchDuration)
+    ssc = new StreamingContext(conf.clone.set("someKey", "someValue"), batchDuration)
     ssc.checkpoint(checkpointDirectory)
     ssc.textFileStream(testDirectory).foreachRDD { rdd => rdd.count() }
     ssc.start()

From 2713bc65af1e0e81edd5fad0338e34fd127391f9 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 16:53:47 -0700
Subject: [PATCH 126/320] [SPARK-7528] [MLLIB] make RankingMetrics
 Java-friendly

`RankingMetrics` contains a ClassTag, which is hard to create in Java. This PR adds a factory method `of` for Java users. coderxiang

Author: Xiangrui Meng <meng@databricks.com>

Closes #6098 from mengxr/SPARK-7528 and squashes the following commits:

e5d57ae [Xiangrui Meng] make RankingMetrics Java-friendly
---
 .../mllib/evaluation/RankingMetrics.scala     | 27 ++++++--
 .../evaluation/JavaRankingMetricsSuite.java   | 64 +++++++++++++++++++
 2 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100644 mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 93a7353e2c070..b9b54b93c27fa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.mllib.evaluation
 
+import java.{lang => jl}
+
+import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
-import org.apache.spark.SparkContext._
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -71,7 +74,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         logWarning("Empty ground truth set, check input data")
         0.0
       }
-    }.mean
+    }.mean()
   }
 
   /**
@@ -100,7 +103,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         logWarning("Empty ground truth set, check input data")
         0.0
       }
-    }.mean
+    }.mean()
   }
 
   /**
@@ -146,7 +149,23 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         logWarning("Empty ground truth set, check input data")
         0.0
       }
-    }.mean
+    }.mean()
   }
 
 }
+
+@Experimental
+object RankingMetrics {
+
+  /**
+   * Creates a [[RankingMetrics]] instance (for Java users).
+   * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
+   */
+  def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
+    implicit val tag = JavaSparkContext.fakeClassTag[E]
+    val rdd = predictionAndLabels.rdd.map { case (predictions, labels) =>
+      (predictions.asScala.toArray, labels.asScala.toArray)
+    }
+    new RankingMetrics(rdd)
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
new file mode 100644
index 0000000000000..effc8a1a6dabc
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.evaluation;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+import scala.Tuple2;
+import scala.Tuple2$;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class JavaRankingMetricsSuite implements Serializable {
+  private transient JavaSparkContext sc;
+  private transient JavaRDD<Tuple2<ArrayList<Integer>, ArrayList<Integer>>> predictionAndLabels;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaRankingMetricsSuite");
+    predictionAndLabels = sc.parallelize(Lists.newArrayList(
+      Tuple2$.MODULE$.apply(
+        Lists.newArrayList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Lists.newArrayList(1, 2, 3, 4, 5)),
+      Tuple2$.MODULE$.apply(
+        Lists.newArrayList(4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Lists.newArrayList(1, 2, 3)),
+      Tuple2$.MODULE$.apply(
+        Lists.newArrayList(1, 2, 3, 4, 5), Lists.<Integer>newArrayList())), 2);
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void rankingMetrics() {
+    @SuppressWarnings("unchecked")
+    RankingMetrics<?> metrics = RankingMetrics.of(predictionAndLabels);
+    Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5);
+    Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5);
+  }
+}

From 23f7d66d51c8809ebc27bfbce3d95515e9b34c2e Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 12 May 2015 17:07:21 -0700
Subject: [PATCH 127/320] [SPARK-7554] [STREAMING] Throw exception when an
 active/stopped StreamingContext is used to create DStreams and output
 operations

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6099 from tdas/SPARK-7554 and squashes the following commits:

2cd4158 [Tathagata Das] Throw exceptions on attempts to add stuff to active and stopped contexts.
---
 .../apache/spark/streaming/DStreamGraph.scala |  2 +-
 .../spark/streaming/dstream/DStream.scala     | 21 +++++++++-
 .../streaming/StreamingContextSuite.scala     | 39 +++++++++++++++++++
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 9c7f698840778..85b354ff4aa0d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -45,7 +45,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
       startTime = time
       outputStreams.foreach(_.initialize(zeroTime))
       outputStreams.foreach(_.remember(rememberDuration))
-      outputStreams.foreach(_.validate)
+      outputStreams.foreach(_.validateAtStart)
       inputStreams.par.foreach(_.start())
     }
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 7092a3d3f0b86..64de7526a6a34 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -60,6 +60,8 @@ abstract class DStream[T: ClassTag] (
     @transient private[streaming] var ssc: StreamingContext
   ) extends Serializable with Logging {
 
+  validateAtInit()
+
   // =======================================================================
   // Methods that should be implemented by subclasses of DStream
   // =======================================================================
@@ -171,7 +173,22 @@ abstract class DStream[T: ClassTag] (
     dependencies.foreach(_.initialize(zeroTime))
   }
 
-  private[streaming] def validate() {
+  private def validateAtInit(): Unit = {
+    ssc.getState() match {
+      case StreamingContextState.INITIALIZED =>
+        // good to go
+      case StreamingContextState.ACTIVE =>
+        throw new SparkException(
+          "Adding new inputs, transformations, and output operations after " +
+            "starting a context is not supported")
+      case StreamingContextState.STOPPED =>
+        throw new SparkException(
+          "Adding new inputs, transformations, and output operations after " +
+            "stopping a context is not supported")
+    }
+  }
+
+  private[streaming] def validateAtStart() {
     assert(rememberDuration != null, "Remember duration is set to null")
 
     assert(
@@ -226,7 +243,7 @@ abstract class DStream[T: ClassTag] (
         math.ceil(rememberDuration.milliseconds / 1000.0).toInt + " seconds."
     )
 
-    dependencies.foreach(_.validate())
+    dependencies.foreach(_.validateAtStart())
 
     logInfo("Slide time = " + slideDuration)
     logInfo("Storage level = " + storageLevel)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 5d09b234f77ce..5f93332896de1 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -651,6 +651,45 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     testPackage.test()
   }
 
+  test("throw exception on using active or stopped context") {
+    val conf = new SparkConf()
+      .setMaster(master)
+      .setAppName(appName)
+      .set("spark.streaming.clock", "org.apache.spark.util.ManualClock")
+    ssc = new StreamingContext(conf, batchDuration)
+    require(ssc.getState() === StreamingContextState.INITIALIZED)
+    val input = addInputStream(ssc)
+    val transformed = input.map { x => x}
+    transformed.foreachRDD { rdd => rdd.count }
+
+    def testForException(clue: String, expectedErrorMsg: String)(body: => Unit): Unit = {
+      withClue(clue) {
+        val ex = intercept[SparkException] {
+          body
+        }
+        assert(ex.getMessage.toLowerCase().contains(expectedErrorMsg))
+      }
+    }
+
+    ssc.start()
+    require(ssc.getState() === StreamingContextState.ACTIVE)
+    testForException("no error on adding input after start", "start") {
+      addInputStream(ssc) }
+    testForException("no error on adding transformation after start", "start") {
+      input.map { x => x * 2 } }
+    testForException("no error on adding output operation after start", "start") {
+      transformed.foreachRDD { rdd => rdd.collect() } }
+
+    ssc.stop()
+    require(ssc.getState() === StreamingContextState.STOPPED)
+    testForException("no error on adding input after stop", "stop") {
+      addInputStream(ssc) }
+    testForException("no error on adding transformation after stop", "stop") {
+      input.map { x => x * 2 } }
+    testForException("no error on adding output operation after stop", "stop") {
+      transformed.foreachRDD { rdd => rdd.collect() } }
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
     val input = (1 to 100).map(i => 1 to i)
     val inputStream = new TestInputStream(s, input, 1)

From 77f64c736d07a44f64393910d092091e8ba6047a Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 12 May 2015 17:15:39 -0700
Subject: [PATCH 128/320] [SPARK-7572] [MLLIB] do not import Param/Params under
 pyspark.ml

Remove `Param` and `Params` from `pyspark.ml` and add a section in the doc. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #6094 from mengxr/SPARK-7572 and squashes the following commits:

022abd6 [Xiangrui Meng] do not import Param/Params under spark.ml
---
 python/docs/pyspark.ml.rst    | 10 +++++++++-
 python/pyspark/ml/__init__.py |  5 ++---
 python/pyspark/ml/pipeline.py |  3 ---
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index a42217a4aee73..8379b8fc8a1e1 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -1,7 +1,7 @@
 pyspark.ml package
 =====================
 
-Module Context
+ML Pipeline APIs
 --------------
 
 .. automodule:: pyspark.ml
@@ -9,6 +9,14 @@ Module Context
     :undoc-members:
     :inherited-members:
 
+pyspark.ml.param module
+-------------------------
+
+.. automodule:: pyspark.ml.param
+    :members:
+    :undoc-members:
+    :inherited-members:
+
 pyspark.ml.feature module
 -------------------------
 
diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py
index 47fed80f42e13..da793d9db7f91 100644
--- a/python/pyspark/ml/__init__.py
+++ b/python/pyspark/ml/__init__.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-from pyspark.ml.param import *
-from pyspark.ml.pipeline import *
+from pyspark.ml.pipeline import Transformer, Estimator, Model, Pipeline, PipelineModel, Evaluator
 
-__all__ = ["Param", "Params", "Transformer", "Estimator", "Pipeline"]
+__all__ = ["Transformer", "Estimator", "Model", "Pipeline", "PipelineModel", "Evaluator"]
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index fdbae06405f6a..a328bcf84a2e7 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -22,9 +22,6 @@
 from pyspark.mllib.common import inherit_doc
 
 
-__all__ = ['Estimator', 'Transformer', 'Pipeline', 'PipelineModel', 'Evaluator', 'Model']
-
-
 @inherit_doc
 class Estimator(Params):
     """

From 247b70349c1e4413657359d626d92e0ffbc2b7f1 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Wed, 13 May 2015 08:33:24 +0800
Subject: [PATCH 129/320] [HOTFIX] Use the old Job API to support old Hadoop
 versions

#5526 uses `Job.getInstance`, which does not exist in the old Hadoop versions. Just use `new Job` to replace it.

cc liancheng

Author: zsxwing <zsxwing@gmail.com>

Closes #6095 from zsxwing/hotfix and squashes the following commits:

b0c2049 [zsxwing] Use the old Job API to support old Hadoop versions
---
 .../src/main/scala/org/apache/spark/sql/sources/commands.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index 127133bfaf662..8372d2c34acc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -88,7 +88,7 @@ private[sql] case class InsertIntoFSBasedRelation(
     }
 
     if (doInsertion) {
-      val job = Job.getInstance(hadoopConf)
+      val job = new Job(hadoopConf)
       job.setOutputKeyClass(classOf[Void])
       job.setOutputValueClass(classOf[Row])
       FileOutputFormat.setOutputPath(job, qualifiedOutputPath)

From 1b9e434b6c19f23a01e9875a3c1966cd03ce8e2d Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Tue, 12 May 2015 18:20:54 -0700
Subject: [PATCH 130/320] [SPARK-7592] Always set resolution to "Fixed" in PR
 merge script.

The issue is that the behavior of the ASF JIRA silently
changed. Now when the "Resolve Issue" transition occurs,
the default resolution is "Pending Closed". We used to
count on the default behavior being to set the
resolution as "Fixed".

The solution is to explicitly set the resolution as "Fixed" and not
count on default behavior.

Author: Patrick Wendell <patrick@databricks.com>

Closes #6103 from pwendell/merge-script-fix and squashes the following commits:

dcc16a6 [Patrick Wendell] Always set resolution to "Fixed" in PR merge script.
---
 dev/merge_spark_pr.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index b69cd15f99f63..f952c9d0b15e2 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -265,8 +265,11 @@ def get_version_json(version_str):
     jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
 
     resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
+    resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
+    custom_fields = {'resolution': {'id': resolution.raw['id']}}
     asf_jira.transition_issue(
-        jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment)
+        jira_id, resolve["id"], fixVersions = jira_fix_versions, 
+        comment = comment, fields = custom_fields)
 
     print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
 

From 8fd55358b7fc1c7545d823bef7b39769f731c1ee Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 12 May 2015 18:37:02 -0700
Subject: [PATCH 131/320] [SPARK-7588] Document all SQL/DataFrame public
 methods with @since tag

This pull request adds since tag to all public methods/classes in SQL/DataFrame to indicate which version the methods/classes were first added.

Author: Reynold Xin <rxin@databricks.com>

Closes #6101 from rxin/tbc and squashes the following commits:

ed55e11 [Reynold Xin] Add since version to all DataFrame methods.
---
 .../java/org/apache/spark/sql/SaveMode.java   |  10 ++
 .../scala/org/apache/spark/sql/Column.scala   | 145 ++++++++++++++++--
 .../org/apache/spark/sql/DataFrame.scala      | 103 +++++++++++++
 .../apache/spark/sql/DataFrameHolder.scala    |   2 +
 .../spark/sql/DataFrameNaFunctions.scala      |  44 ++++++
 .../spark/sql/DataFrameStatFunctions.scala    |  18 +++
 .../spark/sql/ExperimentalMethods.scala       |   4 +
 .../org/apache/spark/sql/GroupedData.scala    |  22 +++
 .../apache/spark/sql/JavaTypeInference.scala  |   3 +-
 .../org/apache/spark/sql/SQLContext.scala     |  94 +++++++++++-
 .../org/apache/spark/sql/SparkSQLParser.scala |   1 -
 .../apache/spark/sql/UDFRegistration.scala    |  49 ++++++
 .../spark/sql/UserDefinedFunction.scala       |   4 +
 .../org/apache/spark/sql/functions.scala      | 140 ++++++++++++++++-
 .../apache/spark/sql/sources/filters.scala    |  30 ++++
 .../apache/spark/sql/sources/interfaces.scala |  57 +++++++
 .../apache/spark/sql/hive/HiveContext.scala   |   6 +
 17 files changed, 706 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
index a40be526d0d11..9665c3c46f901 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java
@@ -18,28 +18,38 @@
 
 /**
  * SaveMode is used to specify the expected behavior of saving a DataFrame to a data source.
+ *
+ * @since 1.3.0
  */
 public enum SaveMode {
   /**
    * Append mode means that when saving a DataFrame to a data source, if data/table already exists,
    * contents of the DataFrame are expected to be appended to existing data.
+   *
+   * @since 1.3.0
    */
   Append,
   /**
    * Overwrite mode means that when saving a DataFrame to a data source,
    * if data/table already exists, existing data is expected to be overwritten by the contents of
    * the DataFrame.
+   *
+   * @since 1.3.0
    */
   Overwrite,
   /**
    * ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists,
    * an exception is expected to be thrown.
+   *
+   * @since 1.3.0
    */
   ErrorIfExists,
   /**
    * Ignore mode means that when saving a DataFrame to a data source, if data already exists,
    * the save operation is expected to not save the contents of the DataFrame and to not
    * change the existing data.
+   *
+   * @since 1.3.0
    */
   Ignore
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4d50821620f5e..4773dedf72117 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -45,6 +45,8 @@ private[sql] object Column {
  * @groupname expr_ops Expression operators.
  * @groupname df_ops DataFrame functions.
  * @groupname Ungrouped Support functions for DataFrames.
+ *
+ * @since 1.3.0
  */
 @Experimental
 class Column(protected[sql] val expr: Expression) extends Logging {
@@ -77,6 +79,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    *   of every struct in that array, and return an Array of fields
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr)
 
@@ -92,6 +95,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def unary_- : Column = UnaryMinus(expr)
 
@@ -107,6 +111,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def unary_! : Column = Not(expr)
 
@@ -122,6 +127,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def === (other: Any): Column = {
     val right = lit(other).expr
@@ -145,6 +151,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def equalTo(other: Any): Column = this === other
 
@@ -161,6 +168,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def !== (other: Any): Column = Not(EqualTo(expr, lit(other).expr))
 
@@ -177,6 +185,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def notEqual(other: Any): Column = Not(EqualTo(expr, lit(other).expr))
 
@@ -192,6 +201,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def > (other: Any): Column = GreaterThan(expr, lit(other).expr)
 
@@ -207,6 +217,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def gt(other: Any): Column = this > other
 
@@ -221,6 +232,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def < (other: Any): Column = LessThan(expr, lit(other).expr)
 
@@ -235,6 +247,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def lt(other: Any): Column = this < other
 
@@ -249,6 +262,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def <= (other: Any): Column = LessThanOrEqual(expr, lit(other).expr)
 
@@ -263,6 +277,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def leq(other: Any): Column = this <= other
 
@@ -277,6 +292,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def >= (other: Any): Column = GreaterThanOrEqual(expr, lit(other).expr)
 
@@ -291,6 +307,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def geq(other: Any): Column = this >= other
 
@@ -298,6 +315,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Equality test that is safe for null values.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def <=> (other: Any): Column = EqualNullSafe(expr, lit(other).expr)
 
@@ -305,6 +323,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Equality test that is safe for null values.
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def eqNullSafe(other: Any): Column = this <=> other
 
@@ -312,6 +331,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * True if the current column is between the lower bound and upper bound, inclusive.
    *
    * @group java_expr_ops
+   * @since 1.4.0
    */
   def between(lowerBound: Any, upperBound: Any): Column = {
     (this >= lowerBound) && (this <= upperBound)
@@ -321,6 +341,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * True if the current expression is null.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def isNull: Column = IsNull(expr)
 
@@ -328,6 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * True if the current expression is NOT null.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def isNotNull: Column = IsNotNull(expr)
 
@@ -342,6 +364,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def || (other: Any): Column = Or(expr, lit(other).expr)
 
@@ -356,6 +379,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def or(other: Column): Column = this || other
 
@@ -370,6 +394,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def && (other: Any): Column = And(expr, lit(other).expr)
 
@@ -384,6 +409,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def and(other: Column): Column = this && other
 
@@ -398,6 +424,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def + (other: Any): Column = Add(expr, lit(other).expr)
 
@@ -412,6 +439,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def plus(other: Any): Column = this + other
 
@@ -426,6 +454,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def - (other: Any): Column = Subtract(expr, lit(other).expr)
 
@@ -440,6 +469,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def minus(other: Any): Column = this - other
 
@@ -454,6 +484,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def * (other: Any): Column = Multiply(expr, lit(other).expr)
 
@@ -468,6 +499,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def multiply(other: Any): Column = this * other
 
@@ -482,6 +514,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def / (other: Any): Column = Divide(expr, lit(other).expr)
 
@@ -496,6 +529,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def divide(other: Any): Column = this / other
 
@@ -503,6 +537,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Modulo (a.k.a. remainder) expression.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def % (other: Any): Column = Remainder(expr, lit(other).expr)
 
@@ -510,6 +545,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Modulo (a.k.a. remainder) expression.
    *
    * @group java_expr_ops
+   * @since 1.3.0
    */
   def mod(other: Any): Column = this % other
 
@@ -518,6 +554,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * by the evaluated values of the arguments.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def in(list: Column*): Column = In(expr, list.map(_.expr))
@@ -526,6 +563,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * SQL like expression.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def like(literal: String): Column = Like(expr, lit(literal).expr)
 
@@ -533,6 +571,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * SQL RLIKE expression (LIKE with Regex).
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def rlike(literal: String): Column = RLike(expr, lit(literal).expr)
 
@@ -541,6 +580,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * or gets a value by key `key` in a [[MapType]].
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key))
 
@@ -548,6 +588,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * An expression that gets a field by name in a [[StructType]].
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName))
 
@@ -557,6 +598,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * @param len expression for the length of the substring.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def substr(startPos: Column, len: Column): Column = Substring(expr, startPos.expr, len.expr)
 
@@ -566,6 +608,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * @param len length of the substring.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def substr(startPos: Int, len: Int): Column = Substring(expr, lit(startPos).expr, lit(len).expr)
 
@@ -573,6 +616,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Contains the other element.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def contains(other: Any): Column = Contains(expr, lit(other).expr)
 
@@ -580,6 +624,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String starts with.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def startsWith(other: Column): Column = StartsWith(expr, lit(other).expr)
 
@@ -587,6 +632,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String starts with another string literal.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def startsWith(literal: String): Column = this.startsWith(lit(literal))
 
@@ -594,6 +640,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String ends with.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def endsWith(other: Column): Column = EndsWith(expr, lit(other).expr)
 
@@ -601,6 +648,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * String ends with another string literal.
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def endsWith(literal: String): Column = this.endsWith(lit(literal))
 
@@ -612,6 +660,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def as(alias: String): Column = Alias(expr, alias)()
 
@@ -623,6 +672,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def as(alias: Symbol): Column = Alias(expr, alias.name)()
 
@@ -634,6 +684,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def as(alias: String, metadata: Metadata): Column = {
     Alias(expr, alias)(explicitMetadata = Some(metadata))
@@ -651,6 +702,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def cast(to: DataType): Column = expr match {
     // Lift alias out of cast so we can support col.as("name").cast(IntegerType)
@@ -668,6 +720,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def cast(to: String): Column = cast(DataTypeParser.parse(to))
 
@@ -682,6 +735,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def desc: Column = SortOrder(expr, Descending)
 
@@ -696,6 +750,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.3.0
    */
   def asc: Column = SortOrder(expr, Ascending)
 
@@ -703,6 +758,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * Prints the expression to the console for debugging purpose.
    *
    * @group df_ops
+   * @since 1.3.0
    */
   def explain(extended: Boolean): Unit = {
     if (extended) {
@@ -719,6 +775,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr)
 
@@ -729,6 +786,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr)
 
@@ -739,6 +797,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr)
 
@@ -748,61 +807,115 @@ class Column(protected[sql] val expr: Expression) extends Logging {
 /**
  * :: Experimental ::
  * A convenient class used for constructing schema.
+ *
+ * @since 1.3.0
  */
 @Experimental
 class ColumnName(name: String) extends Column(name) {
 
-  /** Creates a new AttributeReference of type boolean */
+  /**
+   * Creates a new [[StructField]] of type boolean.
+   * @since 1.3.0
+   */
   def boolean: StructField = StructField(name, BooleanType)
 
-  /** Creates a new AttributeReference of type byte */
+  /**
+   * Creates a new [[StructField]] of type byte.
+   * @since 1.3.0
+   */
   def byte: StructField = StructField(name, ByteType)
 
-  /** Creates a new AttributeReference of type short */
+  /**
+   * Creates a new [[StructField]] of type short.
+   * @since 1.3.0
+   */
   def short: StructField = StructField(name, ShortType)
 
-  /** Creates a new AttributeReference of type int */
+  /**
+   * Creates a new [[StructField]] of type int.
+   * @since 1.3.0
+   */
   def int: StructField = StructField(name, IntegerType)
 
-  /** Creates a new AttributeReference of type long */
+  /**
+   * Creates a new [[StructField]] of type long.
+   * @since 1.3.0
+   */
   def long: StructField = StructField(name, LongType)
 
-  /** Creates a new AttributeReference of type float */
+  /**
+   * Creates a new [[StructField]] of type float.
+   * @since 1.3.0
+   */
   def float: StructField = StructField(name, FloatType)
 
-  /** Creates a new AttributeReference of type double */
+  /**
+   * Creates a new [[StructField]] of type double.
+   * @since 1.3.0
+   */
   def double: StructField = StructField(name, DoubleType)
 
-  /** Creates a new AttributeReference of type string */
+  /**
+   * Creates a new [[StructField]] of type string.
+   * @since 1.3.0
+   */
   def string: StructField = StructField(name, StringType)
 
-  /** Creates a new AttributeReference of type date */
+  /**
+   * Creates a new [[StructField]] of type date.
+   * @since 1.3.0
+   */
   def date: StructField = StructField(name, DateType)
 
-  /** Creates a new AttributeReference of type decimal */
+  /**
+   * Creates a new [[StructField]] of type decimal.
+   * @since 1.3.0
+   */
   def decimal: StructField = StructField(name, DecimalType.Unlimited)
 
-  /** Creates a new AttributeReference of type decimal */
+  /**
+   * Creates a new [[StructField]] of type decimal.
+   * @since 1.3.0
+   */
   def decimal(precision: Int, scale: Int): StructField =
     StructField(name, DecimalType(precision, scale))
 
-  /** Creates a new AttributeReference of type timestamp */
+  /**
+   * Creates a new [[StructField]] of type timestamp.
+   * @since 1.3.0
+   */
   def timestamp: StructField = StructField(name, TimestampType)
 
-  /** Creates a new AttributeReference of type binary */
+  /**
+   * Creates a new [[StructField]] of type binary.
+   * @since 1.3.0
+   */
   def binary: StructField = StructField(name, BinaryType)
 
-  /** Creates a new AttributeReference of type array */
+  /**
+   * Creates a new [[StructField]] of type array.
+   * @since 1.3.0
+   */
   def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
 
-  /** Creates a new AttributeReference of type map */
+  /**
+   * Creates a new [[StructField]] of type map.
+   * @since 1.3.0
+   */
   def map(keyType: DataType, valueType: DataType): StructField =
     map(MapType(keyType, valueType))
 
   def map(mapType: MapType): StructField = StructField(name, mapType)
 
-  /** Creates a new AttributeReference of type struct */
+  /**
+   * Creates a new [[StructField]] of type struct.
+   * @since 1.3.0
+   */
   def struct(fields: StructField*): StructField = struct(StructType(fields))
 
+  /**
+   * Creates a new [[StructField]] of type struct.
+   * @since 1.3.0
+   */
   def struct(structType: StructType): StructField = StructField(name, structType)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 01fd432cc8190..c820a673575ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -114,6 +114,7 @@ private[sql] object DataFrame {
  * @groupname rdd RDD Operations
  * @groupname output Output Operations
  * @groupname action Actions
+ * @since 1.3.0
  */
 // TODO: Improve documentation.
 @Experimental
@@ -233,6 +234,7 @@ class DataFrame private[sql](
   /**
    * Returns the object itself.
    * @group basic
+   * @since 1.3.0
    */
   // This is declared with parentheses to prevent the Scala compiler from treating
   // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
@@ -247,6 +249,7 @@ class DataFrame private[sql](
    *   rdd.toDF("id", "name")  // this creates a DataFrame with column name "id" and "name"
    * }}}
    * @group basic
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def toDF(colNames: String*): DataFrame = {
@@ -264,12 +267,14 @@ class DataFrame private[sql](
   /**
    * Returns the schema of this [[DataFrame]].
    * @group basic
+   * @since 1.3.0
    */
   def schema: StructType = queryExecution.analyzed.schema
 
   /**
    * Returns all column names and their data types as an array.
    * @group basic
+   * @since 1.3.0
    */
   def dtypes: Array[(String, String)] = schema.fields.map { field =>
     (field.name, field.dataType.toString)
@@ -278,18 +283,21 @@ class DataFrame private[sql](
   /**
    * Returns all column names as an array.
    * @group basic
+   * @since 1.3.0
    */
   def columns: Array[String] = schema.fields.map(_.name)
 
   /**
    * Prints the schema to the console in a nice tree format.
    * @group basic
+   * @since 1.3.0
    */
   def printSchema(): Unit = println(schema.treeString)
 
   /**
    * Prints the plans (logical and physical) to the console for debugging purposes.
    * @group basic
+   * @since 1.3.0
    */
   def explain(extended: Boolean): Unit = {
     ExplainCommand(
@@ -302,6 +310,7 @@ class DataFrame private[sql](
   /**
    * Only prints the physical plan to the console for debugging purposes.
    * @group basic
+   * @since 1.3.0
    */
   def explain(): Unit = explain(extended = false)
 
@@ -309,6 +318,7 @@ class DataFrame private[sql](
    * Returns true if the `collect` and `take` methods can be run locally
    * (without any Spark executors).
    * @group basic
+   * @since 1.3.0
    */
   def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation]
 
@@ -325,12 +335,14 @@ class DataFrame private[sql](
    * @param numRows Number of rows to show
    *
    * @group action
+   * @since 1.3.0
    */
   def show(numRows: Int): Unit = println(showString(numRows))
 
   /**
    * Displays the top 20 rows of [[DataFrame]] in a tabular form.
    * @group action
+   * @since 1.3.0
    */
   def show(): Unit = show(20)
 
@@ -342,6 +354,7 @@ class DataFrame private[sql](
    * }}}
    *
    * @group dfops
+   * @since 1.3.1
    */
   def na: DataFrameNaFunctions = new DataFrameNaFunctions(this)
 
@@ -353,6 +366,7 @@ class DataFrame private[sql](
    * }}}
    *
    * @group dfops
+   * @since 1.4.0
    */
   def stat: DataFrameStatFunctions = new DataFrameStatFunctions(this)
 
@@ -363,6 +377,7 @@ class DataFrame private[sql](
    *
    * @param right Right side of the join operation.
    * @group dfops
+   * @since 1.3.0
    */
   def join(right: DataFrame): DataFrame = {
     Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
@@ -386,6 +401,7 @@ class DataFrame private[sql](
    * @param right Right side of the join operation.
    * @param usingColumn Name of the column to join on. This column must exist on both sides.
    * @group dfops
+   * @since 1.4.0
    */
   def join(right: DataFrame, usingColumn: String): DataFrame = {
     // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
@@ -416,6 +432,7 @@ class DataFrame private[sql](
    *   df1.join(df2).where($"df1Key" === $"df2Key")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
 
@@ -437,6 +454,7 @@ class DataFrame private[sql](
    * @param joinExprs Join expression.
    * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
    * @group dfops
+   * @since 1.3.0
    */
   def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = {
     // Note that in this function, we introduce a hack in the case of self-join to automatically
@@ -483,6 +501,7 @@ class DataFrame private[sql](
    *   df.sort($"sortcol".asc)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def sort(sortCol: String, sortCols: String*): DataFrame = {
@@ -495,6 +514,7 @@ class DataFrame private[sql](
    *   df.sort($"col1", $"col2".desc)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def sort(sortExprs: Column*): DataFrame = {
@@ -513,6 +533,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] sorted by the given expressions.
    * This is an alias of the `sort` function.
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols :_*)
@@ -521,6 +542,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] sorted by the given expressions.
    * This is an alias of the `sort` function.
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs :_*)
@@ -528,12 +550,14 @@ class DataFrame private[sql](
   /**
    * Selects column based on the column name and return it as a [[Column]].
    * @group dfops
+   * @since 1.3.0
    */
   def apply(colName: String): Column = col(colName)
 
   /**
    * Selects column based on the column name and return it as a [[Column]].
    * @group dfops
+   * @since 1.3.0
    */
   def col(colName: String): Column = colName match {
     case "*" =>
@@ -546,12 +570,14 @@ class DataFrame private[sql](
   /**
    * Returns a new [[DataFrame]] with an alias set.
    * @group dfops
+   * @since 1.3.0
    */
   def as(alias: String): DataFrame = Subquery(alias, logicalPlan)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] with an alias set.
    * @group dfops
+   * @since 1.3.0
    */
   def as(alias: Symbol): DataFrame = as(alias.name)
 
@@ -561,6 +587,7 @@ class DataFrame private[sql](
    *   df.select($"colA", $"colB" + 1)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def select(cols: Column*): DataFrame = {
@@ -583,6 +610,7 @@ class DataFrame private[sql](
    *   df.select($"colA", $"colB")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) :_*)
@@ -595,6 +623,7 @@ class DataFrame private[sql](
    *   df.selectExpr("colA", "colB as newName", "abs(colC)")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def selectExpr(exprs: String*): DataFrame = {
@@ -612,6 +641,7 @@ class DataFrame private[sql](
    *   peopleDf($"age" > 15)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def filter(condition: Column): DataFrame = Filter(condition.expr, logicalPlan)
 
@@ -621,6 +651,7 @@ class DataFrame private[sql](
    *   peopleDf.filter("age > 15")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def filter(conditionExpr: String): DataFrame = {
     filter(Column(new SqlParser().parseExpression(conditionExpr)))
@@ -635,6 +666,7 @@ class DataFrame private[sql](
    *   peopleDf($"age" > 15)
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def where(condition: Column): DataFrame = filter(condition)
 
@@ -653,6 +685,7 @@ class DataFrame private[sql](
    *   ))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
@@ -675,6 +708,7 @@ class DataFrame private[sql](
    *   ))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def groupBy(col1: String, cols: String*): GroupedData = {
@@ -690,6 +724,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg("age" -> "max", "salary" -> "avg")
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
     groupBy().agg(aggExpr, aggExprs :_*)
@@ -703,6 +738,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
 
@@ -714,6 +750,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
 
@@ -725,6 +762,7 @@ class DataFrame private[sql](
    *   df.groupBy().agg(max($"age"), avg($"salary"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*)
@@ -733,6 +771,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function
    * and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]].
    * @group dfops
+   * @since 1.3.0
    */
   def limit(n: Int): DataFrame = Limit(Literal(n), logicalPlan)
 
@@ -740,6 +779,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] containing union of rows in this frame and another frame.
    * This is equivalent to `UNION ALL` in SQL.
    * @group dfops
+   * @since 1.3.0
    */
   def unionAll(other: DataFrame): DataFrame = Union(logicalPlan, other.logicalPlan)
 
@@ -747,6 +787,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] containing rows only in both this frame and another frame.
    * This is equivalent to `INTERSECT` in SQL.
    * @group dfops
+   * @since 1.3.0
    */
   def intersect(other: DataFrame): DataFrame = Intersect(logicalPlan, other.logicalPlan)
 
@@ -754,6 +795,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] containing rows in this frame but not in another frame.
    * This is equivalent to `EXCEPT` in SQL.
    * @group dfops
+   * @since 1.3.0
    */
   def except(other: DataFrame): DataFrame = Except(logicalPlan, other.logicalPlan)
 
@@ -764,6 +806,7 @@ class DataFrame private[sql](
    * @param fraction Fraction of rows to generate.
    * @param seed Seed for sampling.
    * @group dfops
+   * @since 1.3.0
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = {
     Sample(0.0, fraction, withReplacement, seed, logicalPlan)
@@ -775,6 +818,7 @@ class DataFrame private[sql](
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
    * @group dfops
+   * @since 1.3.0
    */
   def sample(withReplacement: Boolean, fraction: Double): DataFrame = {
     sample(withReplacement, fraction, Utils.random.nextLong)
@@ -786,6 +830,7 @@ class DataFrame private[sql](
    * @param weights weights for splits, will be normalized if they don't sum to 1.
    * @param seed Seed for sampling.
    * @group dfops
+   * @since 1.4.0
    */
   def randomSplit(weights: Array[Double], seed: Long): Array[DataFrame] = {
     val sum = weights.sum
@@ -800,6 +845,7 @@ class DataFrame private[sql](
    *
    * @param weights weights for splits, will be normalized if they don't sum to 1.
    * @group dfops
+   * @since 1.4.0
    */
   def randomSplit(weights: Array[Double]): Array[DataFrame] = {
     randomSplit(weights, Utils.random.nextLong)
@@ -836,6 +882,7 @@ class DataFrame private[sql](
    *   val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title"))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = {
     val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
@@ -860,6 +907,7 @@ class DataFrame private[sql](
    *   df.explode("words", "word")(words: String => words.split(" "))
    * }}}
    * @group dfops
+   * @since 1.3.0
    */
   def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B])
     : DataFrame = {
@@ -883,6 +931,7 @@ class DataFrame private[sql](
   /**
    * Returns a new [[DataFrame]] by adding a column.
    * @group dfops
+   * @since 1.3.0
    */
   def withColumn(colName: String, col: Column): DataFrame = {
     val resolver = sqlContext.analyzer.resolver
@@ -902,6 +951,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] with a column renamed.
    * This is a no-op if schema doesn't contain existingName.
    * @group dfops
+   * @since 1.3.0
    */
   def withColumnRenamed(existingName: String, newName: String): DataFrame = {
     val resolver = sqlContext.analyzer.resolver
@@ -921,6 +971,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] with a column dropped.
    * This is a no-op if schema doesn't contain column name.
    * @group dfops
+   * @since 1.4.0
    */
   def drop(colName: String): DataFrame = {
     val resolver = sqlContext.analyzer.resolver
@@ -940,6 +991,7 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
    * This is an alias for `distinct`.
    * @group dfops
+   * @since 1.4.0
    */
   def dropDuplicates(): DataFrame = dropDuplicates(this.columns)
 
@@ -948,6 +1000,7 @@ class DataFrame private[sql](
    * the subset of columns.
    *
    * @group dfops
+   * @since 1.4.0
    */
   def dropDuplicates(colNames: Seq[String]): DataFrame = {
     val groupCols = colNames.map(resolve)
@@ -967,6 +1020,7 @@ class DataFrame private[sql](
    * the subset of columns.
    *
    * @group dfops
+   * @since 1.4.0
    */
   def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq)
 
@@ -991,6 +1045,7 @@ class DataFrame private[sql](
    * }}}
    *
    * @group action
+   * @since 1.3.1
    */
   @scala.annotation.varargs
   def describe(cols: String*): DataFrame = {
@@ -1034,24 +1089,28 @@ class DataFrame private[sql](
   /**
    * Returns the first `n` rows.
    * @group action
+   * @since 1.3.0
    */
   def head(n: Int): Array[Row] = limit(n).collect()
 
   /**
    * Returns the first row.
    * @group action
+   * @since 1.3.0
    */
   def head(): Row = head(1).head
 
   /**
    * Returns the first row. Alias for head().
    * @group action
+   * @since 1.3.0
    */
   override def first(): Row = head()
 
   /**
    * Returns a new RDD by applying a function to all rows of this DataFrame.
    * @group rdd
+   * @since 1.3.0
    */
   override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f)
 
@@ -1059,12 +1118,14 @@ class DataFrame private[sql](
    * Returns a new RDD by first applying a function to all rows of this [[DataFrame]],
    * and then flattening the results.
    * @group rdd
+   * @since 1.3.0
    */
   override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f)
 
   /**
    * Returns a new RDD by applying a function to each partition of this DataFrame.
    * @group rdd
+   * @since 1.3.0
    */
   override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = {
     rdd.mapPartitions(f)
@@ -1073,42 +1134,49 @@ class DataFrame private[sql](
   /**
    * Applies a function `f` to all rows.
    * @group rdd
+   * @since 1.3.0
    */
   override def foreach(f: Row => Unit): Unit = rdd.foreach(f)
 
   /**
    * Applies a function f to each partition of this [[DataFrame]].
    * @group rdd
+   * @since 1.3.0
    */
   override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f)
 
   /**
    * Returns the first `n` rows in the [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def take(n: Int): Array[Row] = head(n)
 
   /**
    * Returns an array that contains all of [[Row]]s in this [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect()
 
   /**
    * Returns a Java list that contains all of [[Row]]s in this [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*)
 
   /**
    * Returns the number of rows in the [[DataFrame]].
    * @group action
+   * @since 1.3.0
    */
   override def count(): Long = groupBy().count().collect().head.getLong(0)
 
   /**
    * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions.
    * @group rdd
+   * @since 1.3.0
    */
   override def repartition(numPartitions: Int): DataFrame = {
     Repartition(numPartitions, shuffle = true, logicalPlan)
@@ -1120,6 +1188,7 @@ class DataFrame private[sql](
    * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
    * the 100 new partitions will claim 10 of the current partitions.
    * @group rdd
+   * @since 1.4.0
    */
   override def coalesce(numPartitions: Int): DataFrame = {
     Repartition(numPartitions, shuffle = false, logicalPlan)
@@ -1129,11 +1198,13 @@ class DataFrame private[sql](
    * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]].
    * This is an alias for `dropDuplicates`.
    * @group dfops
+   * @since 1.3.0
    */
   override def distinct: DataFrame = Distinct(logicalPlan)
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def persist(): this.type = {
     sqlContext.cacheManager.cacheQuery(this)
@@ -1142,11 +1213,13 @@ class DataFrame private[sql](
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def cache(): this.type = persist()
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def persist(newLevel: StorageLevel): this.type = {
     sqlContext.cacheManager.cacheQuery(this, None, newLevel)
@@ -1155,6 +1228,7 @@ class DataFrame private[sql](
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def unpersist(blocking: Boolean): this.type = {
     sqlContext.cacheManager.tryUncacheQuery(this, blocking)
@@ -1163,6 +1237,7 @@ class DataFrame private[sql](
 
   /**
    * @group basic
+   * @since 1.3.0
    */
   override def unpersist(): this.type = unpersist(blocking = false)
 
@@ -1175,6 +1250,7 @@ class DataFrame private[sql](
    * memoized. Once called, it won't change even if you change any query planning related Spark SQL
    * configurations (e.g. `spark.sql.shuffle.partitions`).
    * @group rdd
+   * @since 1.3.0
    */
   lazy val rdd: RDD[Row] = {
     // use a local variable to make sure the map closure doesn't capture the whole DataFrame
@@ -1188,12 +1264,14 @@ class DataFrame private[sql](
   /**
    * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
    * @group rdd
+   * @since 1.3.0
    */
   def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD()
 
   /**
    * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s.
    * @group rdd
+   * @since 1.3.0
    */
   def javaRDD: JavaRDD[Row] = toJavaRDD
 
@@ -1202,6 +1280,7 @@ class DataFrame private[sql](
    * temporary table is tied to the [[SQLContext]] that was used to create this DataFrame.
    *
    * @group basic
+   * @since 1.3.0
    */
   def registerTempTable(tableName: String): Unit = {
     sqlContext.registerDataFrameAsTable(this, tableName)
@@ -1212,6 +1291,7 @@ class DataFrame private[sql](
    * Files that are written out using this method can be read back in as a [[DataFrame]]
    * using the `parquetFile` function in [[SQLContext]].
    * @group output
+   * @since 1.3.0
    */
   def saveAsParquetFile(path: String): Unit = {
     if (sqlContext.conf.parquetUseDataSourceApi) {
@@ -1235,6 +1315,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String): Unit = {
@@ -1254,6 +1335,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String, mode: SaveMode): Unit = {
@@ -1281,6 +1363,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String, source: String): Unit = {
@@ -1300,6 +1383,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
@@ -1319,6 +1403,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(
@@ -1340,6 +1425,7 @@ class DataFrame private[sql](
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def saveAsTable(
@@ -1365,6 +1451,7 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def saveAsTable(
@@ -1396,6 +1483,7 @@ class DataFrame private[sql](
    * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
    * be the target of an `insertInto`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def saveAsTable(
@@ -1421,6 +1509,7 @@ class DataFrame private[sql](
    * using the default data source configured by spark.sql.sources.default and
    * [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String): Unit = {
@@ -1432,6 +1521,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
    * using the default data source configured by spark.sql.sources.default.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String, mode: SaveMode): Unit = {
@@ -1444,6 +1534,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * using [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String, source: String): Unit = {
@@ -1455,6 +1546,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source and
    * [[SaveMode]] specified by mode.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(path: String, source: String, mode: SaveMode): Unit = {
@@ -1466,6 +1558,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(
@@ -1480,6 +1573,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def save(
@@ -1496,6 +1590,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def save(
@@ -1510,6 +1605,7 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
    * @group output
+   * @since 1.4.0
    */
   @Experimental
   def save(
@@ -1524,6 +1620,7 @@ class DataFrame private[sql](
    * :: Experimental ::
    * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def insertInto(tableName: String, overwrite: Boolean): Unit = {
@@ -1536,6 +1633,7 @@ class DataFrame private[sql](
    * Adds the rows from this RDD to the specified table.
    * Throws an exception if the table already exists.
    * @group output
+   * @since 1.3.0
    */
   @Experimental
   def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
@@ -1543,6 +1641,7 @@ class DataFrame private[sql](
   /**
    * Returns the content of the [[DataFrame]] as a RDD of JSON strings.
    * @group rdd
+   * @since 1.3.0
    */
   def toJSON: RDD[String] = {
     val rowSchema = this.schema
@@ -1581,6 +1680,7 @@ class DataFrame private[sql](
    * given name; if you pass `false`, it will throw if the table already
    * exists.
    * @group output
+   * @since 1.3.0
    */
   def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
     createJDBCTable(url, table, allowExisting, new Properties())
@@ -1594,6 +1694,7 @@ class DataFrame private[sql](
    * given name; if you pass `false`, it will throw if the table already
    * exists.
    * @group output
+   * @since 1.4.0
    */
   def createJDBCTable(
       url: String,
@@ -1626,6 +1727,7 @@ class DataFrame private[sql](
    * the RDD in order via the simple statement
    * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
    * @group output
+   * @since 1.3.0
    */
   def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
     insertIntoJDBC(url, table, overwrite, new Properties())
@@ -1643,6 +1745,7 @@ class DataFrame private[sql](
    * the RDD in order via the simple statement
    * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
    * @group output
+   * @since 1.4.0
    */
   def insertIntoJDBC(
       url: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
index a3187fe3230fd..b87efb58d51e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
 
 /**
  * A container for a [[DataFrame]], used for implicit conversions.
+ *
+ * @since 1.3.0
  */
 private[sql] case class DataFrameHolder(df: DataFrame) {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 4a54120ba86f6..b4c2daa055868 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -30,12 +30,16 @@ import org.apache.spark.sql.types._
 /**
  * :: Experimental ::
  * Functionality for working with missing data in [[DataFrame]]s.
+ *
+ * @since 1.3.1
  */
 @Experimental
 final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing any null values.
+   *
+   * @since 1.3.1
    */
   def drop(): DataFrame = drop("any", df.columns)
 
@@ -44,18 +48,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * If `how` is "any", then drop rows containing any null values.
    * If `how` is "all", then drop rows only if every column is null for that row.
+   *
+   * @since 1.3.1
    */
   def drop(how: String): DataFrame = drop(how, df.columns)
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing any null values
    * in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(cols: Array[String]): DataFrame = drop(cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame ]] that drops rows containing any null values
    * in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols)
 
@@ -65,6 +75,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * If `how` is "any", then drop rows containing any null values in the specified columns.
    * If `how` is "all", then drop rows only if every specified column is null for that row.
+   *
+   * @since 1.3.1
    */
   def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq)
 
@@ -74,6 +86,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * If `how` is "any", then drop rows containing any null values in the specified columns.
    * If `how` is "all", then drop rows only if every specified column is null for that row.
+   *
+   * @since 1.3.1
    */
   def drop(how: String, cols: Seq[String]): DataFrame = {
     how.toLowerCase match {
@@ -85,18 +99,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null values.
+   *
+   * @since 1.3.1
    */
   def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns)
 
   /**
    * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null
    * values in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than
    * `minNonNulls` non-null values in the specified columns.
+   *
+   * @since 1.3.1
    */
   def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
     // Filtering condition -- only keep the row if it has at least `minNonNulls` non-null values.
@@ -106,23 +126,31 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a new [[DataFrame]] that replaces null values in numeric columns with `value`.
+   *
+   * @since 1.3.1
    */
   def fill(value: Double): DataFrame = fill(value, df.columns)
 
   /**
    * Returns a new [[DataFrame ]] that replaces null values in string columns with `value`.
+   *
+   * @since 1.3.1
    */
   def fill(value: String): DataFrame = fill(value, df.columns)
 
   /**
    * Returns a new [[DataFrame]] that replaces null values in specified numeric columns.
    * If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in specified
    * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: Double, cols: Seq[String]): DataFrame = {
     val columnEquals = df.sqlContext.analyzer.resolver
@@ -140,12 +168,16 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   /**
    * Returns a new [[DataFrame]] that replaces null values in specified string columns.
    * If a specified column is not a string column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
   /**
    * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in
    * specified string columns. If a specified column is not a string column, it is ignored.
+   *
+   * @since 1.3.1
    */
   def fill(value: String, cols: Seq[String]): DataFrame = {
     val columnEquals = df.sqlContext.analyzer.resolver
@@ -172,6 +204,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *   import com.google.common.collect.ImmutableMap;
    *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
    * }}}
+   *
+   * @since 1.3.1
    */
   def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
 
@@ -189,6 +223,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *     "B" -> 1.0
    *   ))
    * }}}
+   *
+   * @since 1.3.1
    */
   def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
 
@@ -212,6 +248,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param col name of the column to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = {
     replace[T](col, replacement.toMap : Map[T, T])
@@ -233,6 +271,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param cols list of columns to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = {
     replace(cols.toSeq, replacement.toMap)
@@ -256,6 +296,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param col name of the column to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](col: String, replacement: Map[T, T]): DataFrame = {
     if (col == "*") {
@@ -279,6 +321,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @param cols list of columns to apply the value replacement
    * @param replacement value replacement map, as explained above
+   *
+   * @since 1.3.1
    */
   def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = replace0(cols, replacement)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index a1e74470afc89..5d106c1ac2674 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -23,6 +23,8 @@ import org.apache.spark.sql.execution.stat._
 /**
  * :: Experimental ::
  * Statistic functions for [[DataFrame]]s.
+ *
+ * @since 1.4.0
  */
 @Experimental
 final class DataFrameStatFunctions private[sql](df: DataFrame) {
@@ -32,6 +34,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col1 the name of the first column
    * @param col2 the name of the second column
    * @return the covariance of the two columns.
+   *
+   * @since 1.4.0
    */
   def cov(col1: String, col2: String): Double = {
     StatFunctions.calculateCov(df, Seq(col1, col2))
@@ -45,6 +49,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col1 the name of the column
    * @param col2 the name of the column to calculate the correlation against
    * @return The Pearson Correlation Coefficient as a Double.
+   *
+   * @since 1.4.0
    */
   def corr(col1: String, col2: String, method: String): Double = {
     require(method == "pearson", "Currently only the calculation of the Pearson Correlation " +
@@ -58,6 +64,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col1 the name of the column
    * @param col2 the name of the column to calculate the correlation against
    * @return The Pearson Correlation Coefficient as a Double.
+   *
+   * @since 1.4.0
    */
   def corr(col1: String, col2: String): Double = {
     corr(col1, col2, "pearson")
@@ -76,6 +84,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param col2 The name of the second column. Distinct items will make the column names
    *             of the DataFrame.
    * @return A DataFrame containing for the contingency table.
+   *
+   * @since 1.4.0
    */
   def crosstab(col1: String, col2: String): DataFrame = {
     StatFunctions.crossTabulate(df, col1, col2)
@@ -91,6 +101,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param support The minimum frequency for an item to be considered `frequent`. Should be greater
    *                than 1e-4.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Array[String], support: Double): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, support)
@@ -104,6 +116,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Array[String]): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, 0.01)
@@ -116,6 +130,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Seq[String], support: Double): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, support)
@@ -129,6 +145,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 1.4.0
    */
   def freqItems(cols: Seq[String]): DataFrame = {
     FrequentItems.singlePassFreqItems(df, cols, 0.01)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index d5d7e35a6b35d..717709e4f9312 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -27,6 +27,8 @@ import org.apache.spark.annotation.Experimental
  * {{{
  *   sqlContext.experimental.extraStrategies += ...
  * }}}
+ *
+ * @since 1.3.0
  */
 @Experimental
 class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
@@ -34,6 +36,8 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) {
   /**
    * Allows extra strategies to be injected into the query planner at runtime.  Note this API
    * should be consider experimental and is not intended to be stable across releases.
+   *
+   * @since 1.3.0
    */
   @Experimental
   var extraStrategies: Seq[Strategy] = Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 543320e471bf7..1381b9f1a6080 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.types.NumericType
 /**
  * :: Experimental ::
  * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]].
+ *
+ * @since 1.3.0
  */
 @Experimental
 class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
@@ -94,6 +96,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *     "expense" -> "sum"
    *   )
    * }}}
+   *
+   * @since 1.3.0
    */
   def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
     agg((aggExpr +: aggExprs).toMap)
@@ -111,6 +115,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *     "expense" -> "sum"
    *   ))
    * }}}
+   *
+   * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = {
     exprs.map { case (colName, expr) =>
@@ -129,6 +135,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *   import com.google.common.collect.ImmutableMap;
    *   df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
    * }}}
+   *
+   * @since 1.3.0
    */
   def agg(exprs: java.util.Map[String, String]): DataFrame = {
     agg(exprs.toMap)
@@ -162,6 +170,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *   // Java, 1.3.x:
    *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
    * }}}
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = {
@@ -183,6 +193,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   /**
    * Count the number of rows for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
+   *
+   * @since 1.3.0
    */
   def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")())
 
@@ -190,6 +202,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the average values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def mean(colNames: String*): DataFrame = {
@@ -200,6 +214,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the max value for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the max values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def max(colNames: String*): DataFrame = {
@@ -210,6 +226,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the mean value for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the mean values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def avg(colNames: String*): DataFrame = {
@@ -220,6 +238,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the min value for each numeric column for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the min values for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def min(colNames: String*): DataFrame = {
@@ -230,6 +250,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * Compute the sum for each numeric columns for each group.
    * The resulting [[DataFrame]] will also contain the grouping columns.
    * When specified columns are given, only compute the sum for them.
+   *
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def sum(colNames: String*): DataFrame = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
index db484c5f50074..1ec874f79617c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
@@ -21,11 +21,12 @@ import java.beans.Introspector
 import java.lang.{Iterable => JIterable}
 import java.util.{Iterator => JIterator, Map => JMap}
 
+import scala.language.existentials
+
 import com.google.common.reflect.TypeToken
 
 import org.apache.spark.sql.types._
 
-import scala.language.existentials
 
 /**
  * Type-inference utilities for POJOs and Java collections.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 70ba8985d6342..975498c11fa23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -95,6 +95,8 @@ private[spark] class DefaultParserDialect extends ParserDialect {
  * @groupname config Configuration
  * @groupname dataframes Custom DataFrame Creation
  * @groupname Ungrouped Support functions for language integrated queries.
+ *
+ * @since 1.0.0
  */
 class SQLContext(@transient val sparkContext: SparkContext)
   extends org.apache.spark.Logging
@@ -113,6 +115,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Set Spark SQL configuration properties.
    *
    * @group config
+   * @since 1.0.0
    */
   def setConf(props: Properties): Unit = conf.setConf(props)
 
@@ -120,6 +123,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Set the given Spark SQL configuration property.
    *
    * @group config
+   * @since 1.0.0
    */
   def setConf(key: String, value: String): Unit = conf.setConf(key, value)
 
@@ -127,6 +131,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Return the value of Spark SQL configuration property for the given key.
    *
    * @group config
+   * @since 1.0.0
    */
   def getConf(key: String): String = conf.getConf(key)
 
@@ -135,6 +140,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * yet, return `defaultValue`.
    *
    * @group config
+   * @since 1.0.0
    */
   def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue)
 
@@ -143,6 +149,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * This creates a new copy of the config properties in the form of a Map.
    *
    * @group config
+   * @since 1.0.0
    */
   def getAllConfs: immutable.Map[String, String] = conf.getAllConfs
 
@@ -228,6 +235,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * the query planner for advanced functionality.
    *
    * @group basic
+   * @since 1.3.0
    */
   @Experimental
   @transient
@@ -238,6 +246,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns a [[DataFrame]] with no rows or columns.
    *
    * @group basic
+   * @since 1.3.0
    */
   @Experimental
   @transient
@@ -270,6 +279,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group basic
+   * @since 1.3.0
    * TODO move to SQLSession?
    */
   @transient
@@ -278,23 +288,27 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * Returns true if the table is currently cached in-memory.
    * @group cachemgmt
+   * @since 1.3.0
    */
   def isCached(tableName: String): Boolean = cacheManager.isCached(tableName)
 
   /**
    * Caches the specified table in-memory.
    * @group cachemgmt
+   * @since 1.3.0
    */
   def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName)
 
   /**
    * Removes the specified table from the in-memory cache.
    * @group cachemgmt
+   * @since 1.3.0
    */
   def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName)
 
   /**
    * Removes all cached tables from the in-memory cache.
+   * @since 1.3.0
    */
   def clearCache(): Unit = cacheManager.clearCache()
 
@@ -311,27 +325,40 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group basic
+   * @since 1.3.0
    */
   @Experimental
   object implicits extends Serializable {
     // scalastyle:on
 
-    /** Converts $"col name" into an [[Column]]. */
+    /**
+     * Converts $"col name" into an [[Column]].
+     * @since 1.3.0
+     */
     implicit class StringToColumn(val sc: StringContext) {
       def $(args: Any*): ColumnName = {
         new ColumnName(sc.s(args :_*))
       }
     }
 
-    /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */
+    /**
+     * An implicit conversion that turns a Scala `Symbol` into a [[Column]].
+     * @since 1.3.0
+     */
     implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
 
-    /** Creates a DataFrame from an RDD of case classes or tuples. */
+    /**
+     * Creates a DataFrame from an RDD of case classes or tuples.
+     * @since 1.3.0
+     */
     implicit def rddToDataFrameHolder[A <: Product : TypeTag](rdd: RDD[A]): DataFrameHolder = {
       DataFrameHolder(self.createDataFrame(rdd))
     }
 
-    /** Creates a DataFrame from a local Seq of Product. */
+    /**
+     * Creates a DataFrame from a local Seq of Product.
+     * @since 1.3.0
+     */
     implicit def localSeqToDataFrameHolder[A <: Product : TypeTag](data: Seq[A]): DataFrameHolder =
     {
       DataFrameHolder(self.createDataFrame(data))
@@ -341,7 +368,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
     // making existing implicit conversions ambiguous. In particular, RDD[Double] is dangerous
     // because of [[DoubleRDDFunctions]].
 
-    /** Creates a single column DataFrame from an RDD[Int]. */
+    /**
+     * Creates a single column DataFrame from an RDD[Int].
+     * @since 1.3.0
+     */
     implicit def intRddToDataFrameHolder(data: RDD[Int]): DataFrameHolder = {
       val dataType = IntegerType
       val rows = data.mapPartitions { iter =>
@@ -354,7 +384,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
       DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil)))
     }
 
-    /** Creates a single column DataFrame from an RDD[Long]. */
+    /**
+     * Creates a single column DataFrame from an RDD[Long].
+     * @since 1.3.0
+     */
     implicit def longRddToDataFrameHolder(data: RDD[Long]): DataFrameHolder = {
       val dataType = LongType
       val rows = data.mapPartitions { iter =>
@@ -367,7 +400,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
       DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil)))
     }
 
-    /** Creates a single column DataFrame from an RDD[String]. */
+    /**
+     * Creates a single column DataFrame from an RDD[String].
+     * @since 1.3.0
+     */
     implicit def stringRddToDataFrameHolder(data: RDD[String]): DataFrameHolder = {
       val dataType = StringType
       val rows = data.mapPartitions { iter =>
@@ -386,6 +422,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Creates a DataFrame from an RDD of case classes.
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @Experimental
   def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
@@ -401,6 +438,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Creates a DataFrame from a local Seq of Product.
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @Experimental
   def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
@@ -414,6 +452,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
    *
    * @group dataframes
+   * @since 1.3.0
    */
   def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
     DataFrame(this, LogicalRelation(baseRelation))
@@ -449,6 +488,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @DeveloperApi
   def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
@@ -480,6 +520,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * the provided schema. Otherwise, there will be runtime exception.
    *
    * @group dataframes
+   * @since 1.3.0
    */
   @DeveloperApi
   def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
@@ -492,6 +533,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
    * @group dataframes
+   * @since 1.3.0
    */
   def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
     val attributeSeq = getSchema(beanClass)
@@ -520,6 +562,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
    * @group dataframes
+   * @since 1.3.0
    */
   def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
     createDataFrame(rdd.rdd, beanClass)
@@ -591,6 +634,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * [[DataFrame]] if no paths are passed in.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def parquetFile(paths: String*): DataFrame = {
@@ -609,6 +653,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   def jsonFile(path: String): DataFrame = jsonFile(path, 1.0)
 
@@ -618,6 +663,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonFile(path: String, schema: StructType): DataFrame =
@@ -626,6 +672,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   /**
    * :: Experimental ::
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonFile(path: String, samplingRatio: Double): DataFrame =
@@ -637,6 +684,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0)
 
@@ -647,6 +695,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0)
 
@@ -656,6 +705,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
@@ -678,6 +728,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
@@ -690,6 +741,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
@@ -711,6 +763,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
@@ -723,6 +776,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * using the default data source configured by spark.sql.sources.default.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(path: String): DataFrame = {
@@ -735,6 +789,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the dataset stored at path as a DataFrame, using the given data source.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(path: String, source: String): DataFrame = {
@@ -747,6 +802,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(source: String, options: java.util.Map[String, String]): DataFrame = {
@@ -759,6 +815,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(source: String, options: Map[String, String]): DataFrame = {
@@ -772,6 +829,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -787,6 +845,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    *
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -802,6 +861,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * (Scala-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -817,6 +877,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * (Scala-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    * @group genericdata
+   * @since 1.3.0
    */
   @Experimental
   def load(
@@ -834,6 +895,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * It will use the default data source configured by spark.sql.sources.default.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(tableName: String, path: String): DataFrame = {
@@ -847,6 +909,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * and returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -862,6 +925,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -878,6 +942,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -903,6 +968,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options. Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -920,6 +986,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * a set of options. Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   @Experimental
   def createExternalTable(
@@ -946,6 +1013,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * url named table.
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jdbc(url: String, table: String): DataFrame = {
@@ -958,6 +1026,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * url named table and connection properties.
    *
    * @group specificdata
+   * @since 1.4.0
    */
   @Experimental
   def jdbc(url: String, table: String, properties: Properties): DataFrame = {
@@ -976,6 +1045,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
    *                      evenly into this many partitions
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jdbc(
@@ -1001,6 +1071,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *                      evenly into this many partitions
    * @param properties connection properties
    * @group specificdata
+   * @since 1.4.0
    */
   @Experimental
   def jdbc(
@@ -1024,6 +1095,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * of the [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.3.0
    */
   @Experimental
   def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
@@ -1038,6 +1110,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * of the [[DataFrame]].
    *
    * @group specificdata
+   * @since 1.4.0
    */
   @Experimental
   def jdbc(
@@ -1075,6 +1148,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @param tableName the name of the table to be unregistered.
    *
    * @group basic
+   * @since 1.3.0
    */
   def dropTempTable(tableName: String): Unit = {
     cacheManager.tryUncacheQuery(table(tableName))
@@ -1086,6 +1160,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * used for SQL parsing can be configured with 'spark.sql.dialect'.
    *
    * @group basic
+   * @since 1.3.0
    */
   def sql(sqlText: String): DataFrame = {
     DataFrame(this, parseSql(sqlText))
@@ -1095,6 +1170,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the specified table as a [[DataFrame]].
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def table(tableName: String): DataFrame =
     DataFrame(this, catalog.lookupRelation(Seq(tableName)))
@@ -1105,6 +1181,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * indicating if a table is a temporary one or not).
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tables(): DataFrame = {
     DataFrame(this, ShowTablesCommand(None))
@@ -1116,6 +1193,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * indicating if a table is a temporary one or not).
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tables(databaseName: String): DataFrame = {
     DataFrame(this, ShowTablesCommand(Some(databaseName)))
@@ -1125,6 +1203,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the names of tables in the current database as an array.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tableNames(): Array[String] = {
     catalog.getTables(None).map {
@@ -1136,6 +1215,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * Returns the names of tables in the given database as an array.
    *
    * @group ddl_ops
+   * @since 1.3.0
    */
   def tableNames(databaseName: String): Array[String] = {
     catalog.getTables(Some(databaseName)).map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
index 5921eaf5e63f4..6b1ae81972e4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql
 
-
 import scala.util.parsing.combinator.RegexParsers
 
 import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index b97aaf73529a3..dc3389c41bbfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -33,6 +33,8 @@ import org.apache.spark.sql.types.DataType
 
 /**
  * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this.
+ *
+ * @since 1.3.0
  */
 class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
@@ -87,6 +89,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
         /**
          * Register a Scala closure of ${x} arguments as user-defined function (UDF).
          * @tparam RT return type of UDF.
+         * @since 1.3.0
          */
         def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
           val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -104,6 +107,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
       println(s"""
          |/**
          | * Register a user-defined function with ${i} arguments.
+         | * @since 1.3.0
          | */
          |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = {
          |  functionRegistry.registerFunction(
@@ -116,6 +120,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 0 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -127,6 +132,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 1 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -138,6 +144,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 2 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -149,6 +156,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 3 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -160,6 +168,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 4 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -171,6 +180,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 5 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -182,6 +192,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 6 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -193,6 +204,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 7 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -204,6 +216,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 8 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -215,6 +228,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 9 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -226,6 +240,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 10 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -237,6 +252,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 11 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -248,6 +264,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 12 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -259,6 +276,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 13 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -270,6 +288,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 14 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -281,6 +300,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 15 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -292,6 +312,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 16 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -303,6 +324,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 17 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -314,6 +336,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 18 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -325,6 +348,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 19 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -336,6 +360,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 20 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -347,6 +372,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 21 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -358,6 +384,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
   /**
    * Register a Scala closure of 22 arguments as user-defined function (UDF).
    * @tparam RT return type of UDF.
+   * @since 1.3.0
    */
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
@@ -371,6 +398,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 1 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -380,6 +408,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 2 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -389,6 +418,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 3 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -398,6 +428,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 4 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -407,6 +438,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 5 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -416,6 +448,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 6 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -425,6 +458,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 7 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -434,6 +468,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 8 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -443,6 +478,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 9 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -452,6 +488,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 10 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -461,6 +498,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 11 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -470,6 +508,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 12 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -479,6 +518,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 13 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -488,6 +528,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 14 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -497,6 +538,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 15 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -506,6 +548,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 16 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -515,6 +558,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 17 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -524,6 +568,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 18 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -533,6 +578,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 19 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -542,6 +588,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 20 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -551,6 +598,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 21 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
@@ -560,6 +608,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
   /**
    * Register a user-defined function with 22 arguments.
+   * @since 1.3.0
    */
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = {
     functionRegistry.registerFunction(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
index 295db539adfc4..505ab1301ec96 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.util.{List => JList, Map => JMap}
 
 import org.apache.spark.Accumulator
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.expressions.ScalaUdf
@@ -36,7 +37,10 @@ import org.apache.spark.sql.types.DataType
  *   // Projects a column that adds a prediction column based on the score column.
  *   df.select( predict(df("score")) )
  * }}}
+ *
+ * @since 1.3.0
  */
+@Experimental
 case class UserDefinedFunction protected[sql] (f: AnyRef, dataType: DataType) {
 
   def apply(exprs: Column*): Column = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index fae4bd0fd2994..215787e40bf0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -38,6 +38,7 @@ import org.apache.spark.util.Utils
  * @groupname normal_funcs Non-aggregate functions
  * @groupname math_funcs Math functions
  * @groupname Ungrouped Support functions for DataFrames.
+ * @since 1.3.0
  */
 @Experimental
 // scalastyle:off
@@ -50,6 +51,7 @@ object functions {
    * Returns a [[Column]] based on the given column name.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def col(colName: String): Column = Column(colName)
 
@@ -57,6 +59,7 @@ object functions {
    * Returns a [[Column]] based on the given column name. Alias of [[col]].
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def column(colName: String): Column = Column(colName)
 
@@ -68,6 +71,7 @@ object functions {
    * Otherwise, a new [[Column]] is created to represent the literal value.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def lit(literal: Any): Column = {
     literal match {
@@ -92,6 +96,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
+   * @since 1.3.0
    */
   def asc(columnName: String): Column = Column(columnName).asc
 
@@ -103,6 +108,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
+   * @since 1.3.0
    */
   def desc(columnName: String): Column = Column(columnName).desc
 
@@ -114,6 +120,7 @@ object functions {
    * Aggregate function: returns the sum of all values in the expression.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sum(e: Column): Column = Sum(e.expr)
 
@@ -121,6 +128,7 @@ object functions {
    * Aggregate function: returns the sum of all values in the given column.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sum(columnName: String): Column = sum(Column(columnName))
 
@@ -128,6 +136,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sumDistinct(e: Column): Column = SumDistinct(e.expr)
 
@@ -135,6 +144,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
 
@@ -142,6 +152,7 @@ object functions {
    * Aggregate function: returns the number of items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def count(e: Column): Column = e.expr match {
     // Turn count(*) into count(1)
@@ -153,6 +164,7 @@ object functions {
    * Aggregate function: returns the number of items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def count(columnName: String): Column = count(Column(columnName))
 
@@ -160,6 +172,7 @@ object functions {
    * Aggregate function: returns the number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(expr: Column, exprs: Column*): Column =
@@ -169,6 +182,7 @@ object functions {
    * Aggregate function: returns the number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(columnName: String, columnNames: String*): Column =
@@ -178,6 +192,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr)
 
@@ -185,6 +200,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName))
 
@@ -192,6 +208,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)
 
@@ -199,6 +216,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def approxCountDistinct(columnName: String, rsd: Double): Column = {
     approxCountDistinct(Column(columnName), rsd)
@@ -208,6 +226,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def avg(e: Column): Column = Average(e.expr)
 
@@ -215,6 +234,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def avg(columnName: String): Column = avg(Column(columnName))
 
@@ -222,6 +242,7 @@ object functions {
    * Aggregate function: returns the first value in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def first(e: Column): Column = First(e.expr)
 
@@ -229,6 +250,7 @@ object functions {
    * Aggregate function: returns the first value of a column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def first(columnName: String): Column = first(Column(columnName))
 
@@ -236,6 +258,7 @@ object functions {
    * Aggregate function: returns the last value in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def last(e: Column): Column = Last(e.expr)
 
@@ -243,6 +266,7 @@ object functions {
    * Aggregate function: returns the last value of the column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def last(columnName: String): Column = last(Column(columnName))
 
@@ -251,6 +275,7 @@ object functions {
    * Alias for avg.
    *
    * @group agg_funcs
+   * @since 1.4.0
    */
   def mean(e: Column): Column = avg(e)
 
@@ -259,6 +284,7 @@ object functions {
    * Alias for avg.
    *
    * @group agg_funcs
+   * @since 1.4.0
    */
   def mean(columnName: String): Column = avg(columnName)
 
@@ -266,6 +292,7 @@ object functions {
    * Aggregate function: returns the minimum value of the expression in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def min(e: Column): Column = Min(e.expr)
 
@@ -273,6 +300,7 @@ object functions {
    * Aggregate function: returns the minimum value of the column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def min(columnName: String): Column = min(Column(columnName))
 
@@ -280,6 +308,7 @@ object functions {
    * Aggregate function: returns the maximum value of the expression in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def max(e: Column): Column = Max(e.expr)
 
@@ -287,6 +316,7 @@ object functions {
    * Aggregate function: returns the maximum value of the column in a group.
    *
    * @group agg_funcs
+   * @since 1.3.0
    */
   def max(columnName: String): Column = max(Column(columnName))
 
@@ -298,6 +328,7 @@ object functions {
    * Computes the absolute value.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def abs(e: Column): Column = Abs(e.expr)
 
@@ -305,6 +336,7 @@ object functions {
    * Creates a new array column. The input columns must all have the same data type.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def array(cols: Column*): Column = CreateArray(cols.map(_.expr))
@@ -313,6 +345,7 @@ object functions {
    * Creates a new array column. The input columns must all have the same data type.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def array(colName: String, colNames: String*): Column = {
     array((colName +: colNames).map(col) : _*)
@@ -325,6 +358,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
@@ -333,6 +367,7 @@ object functions {
    * Converts a string exprsesion to lower case.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def lower(e: Column): Column = Lower(e.expr)
 
@@ -349,6 +384,7 @@ object functions {
    * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def monotonicallyIncreasingId(): Column = execution.expressions.MonotonicallyIncreasingID()
 
@@ -364,6 +400,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def negate(e: Column): Column = -e
 
@@ -378,6 +415,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def not(e: Column): Column = !e
 
@@ -385,6 +423,7 @@ object functions {
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def rand(seed: Long): Column = Rand(seed)
 
@@ -392,6 +431,7 @@ object functions {
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def rand(): Column = rand(Utils.random.nextLong)
 
@@ -399,6 +439,7 @@ object functions {
    * Generate a column with i.i.d. samples from the standard normal distribution.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def randn(seed: Long): Column = Randn(seed)
 
@@ -406,6 +447,7 @@ object functions {
    * Generate a column with i.i.d. samples from the standard normal distribution.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def randn(): Column = randn(Utils.random.nextLong)
 
@@ -415,6 +457,7 @@ object functions {
    * Note that this is indeterministic because it depends on data partitioning and task scheduling.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def sparkPartitionId(): Column = execution.expressions.SparkPartitionID
 
@@ -422,6 +465,7 @@ object functions {
    * Computes the square root of the specified float value.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def sqrt(e: Column): Column = Sqrt(e.expr)
 
@@ -430,6 +474,7 @@ object functions {
    * a derived column expression that is named (i.e. aliased).
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def struct(cols: Column*): Column = {
@@ -442,6 +487,7 @@ object functions {
    * Creates a new struct column that composes multiple input columns.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def struct(colName: String, colNames: String*): Column = {
     struct((colName +: colNames).map(col) : _*)
@@ -451,14 +497,15 @@ object functions {
    * Converts a string expression to upper case.
    *
    * @group normal_funcs
+   * @since 1.3.0
    */
   def upper(e: Column): Column = Upper(e.expr)
 
-
   /**
    * Computes bitwise NOT.
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr)
 
@@ -471,6 +518,7 @@ object functions {
    * 0.0 through pi.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def acos(e: Column): Column = Acos(e.expr)
 
@@ -479,6 +527,7 @@ object functions {
    * 0.0 through pi.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def acos(columnName: String): Column = acos(Column(columnName))
 
@@ -487,6 +536,7 @@ object functions {
    * -pi/2 through pi/2.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def asin(e: Column): Column = Asin(e.expr)
 
@@ -495,6 +545,7 @@ object functions {
    * -pi/2 through pi/2.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def asin(columnName: String): Column = asin(Column(columnName))
 
@@ -502,6 +553,7 @@ object functions {
    * Computes the tangent inverse of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan(e: Column): Column = Atan(e.expr)
 
@@ -509,6 +561,7 @@ object functions {
    * Computes the tangent inverse of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan(columnName: String): Column = atan(Column(columnName))
 
@@ -517,6 +570,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Column, r: Column): Column = Atan2(l.expr, r.expr)
 
@@ -525,6 +579,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Column, rightName: String): Column = atan2(l, Column(rightName))
 
@@ -533,6 +588,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(leftName: String, r: Column): Column = atan2(Column(leftName), r)
 
@@ -541,6 +597,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(leftName: String, rightName: String): Column =
     atan2(Column(leftName), Column(rightName))
@@ -550,6 +607,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Column, r: Double): Column = atan2(l, lit(r).expr)
 
@@ -558,6 +616,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(leftName: String, r: Double): Column = atan2(Column(leftName), r)
 
@@ -566,6 +625,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Double, r: Column): Column = atan2(lit(l).expr, r)
 
@@ -574,6 +634,7 @@ object functions {
    * polar coordinates (r, theta).
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def atan2(l: Double, rightName: String): Column = atan2(l, Column(rightName))
 
@@ -581,6 +642,7 @@ object functions {
    * Computes the cube-root of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cbrt(e: Column): Column = Cbrt(e.expr)
 
@@ -588,6 +650,7 @@ object functions {
    * Computes the cube-root of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cbrt(columnName: String): Column = cbrt(Column(columnName))
 
@@ -595,6 +658,7 @@ object functions {
    * Computes the ceiling of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def ceil(e: Column): Column = Ceil(e.expr)
 
@@ -602,6 +666,7 @@ object functions {
    * Computes the ceiling of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def ceil(columnName: String): Column = ceil(Column(columnName))
 
@@ -609,6 +674,7 @@ object functions {
    * Computes the cosine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cos(e: Column): Column = Cos(e.expr)
 
@@ -616,6 +682,7 @@ object functions {
    * Computes the cosine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cos(columnName: String): Column = cos(Column(columnName))
 
@@ -623,6 +690,7 @@ object functions {
    * Computes the hyperbolic cosine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cosh(e: Column): Column = Cosh(e.expr)
 
@@ -630,6 +698,7 @@ object functions {
    * Computes the hyperbolic cosine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def cosh(columnName: String): Column = cosh(Column(columnName))
 
@@ -637,6 +706,7 @@ object functions {
    * Computes the exponential of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def exp(e: Column): Column = Exp(e.expr)
 
@@ -644,6 +714,7 @@ object functions {
    * Computes the exponential of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def exp(columnName: String): Column = exp(Column(columnName))
 
@@ -651,6 +722,7 @@ object functions {
    * Computes the exponential of the given value minus one.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def expm1(e: Column): Column = Expm1(e.expr)
 
@@ -658,6 +730,7 @@ object functions {
    * Computes the exponential of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def expm1(columnName: String): Column = expm1(Column(columnName))
 
@@ -665,6 +738,7 @@ object functions {
    * Computes the floor of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def floor(e: Column): Column = Floor(e.expr)
 
@@ -672,6 +746,7 @@ object functions {
    * Computes the floor of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def floor(columnName: String): Column = floor(Column(columnName))
 
@@ -679,6 +754,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Column, r: Column): Column = Hypot(l.expr, r.expr)
 
@@ -686,6 +762,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName))
 
@@ -693,6 +770,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r)
 
@@ -700,6 +778,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(leftName: String, rightName: String): Column =
     hypot(Column(leftName), Column(rightName))
@@ -708,6 +787,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Column, r: Double): Column = hypot(l, lit(r).expr)
 
@@ -715,6 +795,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r)
 
@@ -722,6 +803,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Double, r: Column): Column = hypot(lit(l).expr, r)
 
@@ -729,6 +811,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
 
@@ -736,6 +819,7 @@ object functions {
    * Computes the natural logarithm of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log(e: Column): Column = Log(e.expr)
 
@@ -743,6 +827,7 @@ object functions {
    * Computes the natural logarithm of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log(columnName: String): Column = log(Column(columnName))
 
@@ -750,6 +835,7 @@ object functions {
    * Computes the logarithm of the given value in Base 10.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log10(e: Column): Column = Log10(e.expr)
 
@@ -757,6 +843,7 @@ object functions {
    * Computes the logarithm of the given value in Base 10.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log10(columnName: String): Column = log10(Column(columnName))
 
@@ -764,6 +851,7 @@ object functions {
    * Computes the natural logarithm of the given value plus one.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log1p(e: Column): Column = Log1p(e.expr)
 
@@ -771,6 +859,7 @@ object functions {
    * Computes the natural logarithm of the given column plus one.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def log1p(columnName: String): Column = log1p(Column(columnName))
 
@@ -778,6 +867,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Column, r: Column): Column = Pow(l.expr, r.expr)
 
@@ -785,6 +875,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Column, rightName: String): Column = pow(l, Column(rightName))
 
@@ -792,6 +883,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(leftName: String, r: Column): Column = pow(Column(leftName), r)
 
@@ -799,6 +891,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName))
 
@@ -806,6 +899,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Column, r: Double): Column = pow(l, lit(r).expr)
 
@@ -813,6 +907,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(leftName: String, r: Double): Column = pow(Column(leftName), r)
 
@@ -820,6 +915,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Double, r: Column): Column = pow(lit(l).expr, r)
 
@@ -827,6 +923,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
 
@@ -835,6 +932,7 @@ object functions {
    * is equal to a mathematical integer.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def rint(e: Column): Column = Rint(e.expr)
 
@@ -843,6 +941,7 @@ object functions {
    * is equal to a mathematical integer.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def rint(columnName: String): Column = rint(Column(columnName))
 
@@ -850,6 +949,7 @@ object functions {
    * Computes the signum of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def signum(e: Column): Column = Signum(e.expr)
 
@@ -857,6 +957,7 @@ object functions {
    * Computes the signum of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def signum(columnName: String): Column = signum(Column(columnName))
 
@@ -864,6 +965,7 @@ object functions {
    * Computes the sine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sin(e: Column): Column = Sin(e.expr)
 
@@ -871,6 +973,7 @@ object functions {
    * Computes the sine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sin(columnName: String): Column = sin(Column(columnName))
 
@@ -878,6 +981,7 @@ object functions {
    * Computes the hyperbolic sine of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sinh(e: Column): Column = Sinh(e.expr)
 
@@ -885,6 +989,7 @@ object functions {
    * Computes the hyperbolic sine of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def sinh(columnName: String): Column = sinh(Column(columnName))
 
@@ -892,6 +997,7 @@ object functions {
    * Computes the tangent of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tan(e: Column): Column = Tan(e.expr)
 
@@ -899,6 +1005,7 @@ object functions {
    * Computes the tangent of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tan(columnName: String): Column = tan(Column(columnName))
 
@@ -906,6 +1013,7 @@ object functions {
    * Computes the hyperbolic tangent of the given value.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tanh(e: Column): Column = Tanh(e.expr)
 
@@ -913,6 +1021,7 @@ object functions {
    * Computes the hyperbolic tangent of the given column.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def tanh(columnName: String): Column = tanh(Column(columnName))
 
@@ -920,6 +1029,7 @@ object functions {
    * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toDegrees(e: Column): Column = ToDegrees(e.expr)
 
@@ -927,6 +1037,7 @@ object functions {
    * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toDegrees(columnName: String): Column = toDegrees(Column(columnName))
 
@@ -934,6 +1045,7 @@ object functions {
    * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toRadians(e: Column): Column = ToRadians(e.expr)
 
@@ -941,6 +1053,7 @@ object functions {
    * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
    *
    * @group math_funcs
+   * @since 1.4.0
    */
   def toRadians(columnName: String): Column = toRadians(Column(columnName))
     
@@ -960,6 +1073,7 @@ object functions {
      * The data types are automatically inferred based on the function's signature.
      *
      * @group udf_funcs
+     * @since 1.3.0
      */
     def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
       UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -976,6 +1090,7 @@ object functions {
      * you to specify the return data type.
      *
      * @group udf_funcs
+     * @since 1.3.0
      */
     def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = {
       ScalaUdf(f, returnType, Seq($argsInUdf))
@@ -988,6 +1103,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -998,6 +1114,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1008,6 +1125,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1018,6 +1136,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1028,6 +1147,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1038,6 +1158,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1048,6 +1169,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1058,6 +1180,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1068,6 +1191,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1078,6 +1202,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1088,6 +1213,7 @@ object functions {
    * The data types are automatically inferred based on the function's signature.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType)
@@ -1100,6 +1226,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function0[_], returnType: DataType): Column = {
     ScalaUdf(f, returnType, Seq())
@@ -1110,6 +1237,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr))
@@ -1120,6 +1248,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr))
@@ -1130,6 +1259,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr))
@@ -1140,6 +1270,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr))
@@ -1150,6 +1281,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr))
@@ -1160,6 +1292,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr))
@@ -1170,6 +1303,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr))
@@ -1180,6 +1314,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr))
@@ -1190,6 +1325,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr))
@@ -1200,6 +1336,7 @@ object functions {
    * you to specify the return data type.
    *
    * @group udf_funcs
+   * @since 1.3.0
    */
   def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = {
     ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr))
@@ -1220,6 +1357,7 @@ object functions {
    * }}}
    *
    * @group udf_funcs
+   * @since 1.4.0
    */
   def callUdf(udfName: String, cols: Column*): Column = {
      UnresolvedFunction(udfName, cols.map(_.expr))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 791046e0079d6..24e86ca415c51 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -19,83 +19,113 @@ package org.apache.spark.sql.sources
 
 /**
  * A filter predicate for data sources.
+ *
+ * @since 1.3.0
  */
 abstract class Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * equal to `value`.
+ *
+ * @since 1.3.0
  */
 case class EqualTo(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * greater than `value`.
+ *
+ * @since 1.3.0
  */
 case class GreaterThan(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * greater than or equal to `value`.
+ *
+ * @since 1.3.0
  */
 case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * less than `value`.
+ *
+ * @since 1.3.0
  */
 case class LessThan(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
  * less than or equal to `value`.
+ *
+ * @since 1.3.0
  */
 case class LessThanOrEqual(attribute: String, value: Any) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to one of the values in the array.
+ *
+ * @since 1.3.0
  */
 case class In(attribute: String, values: Array[Any]) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to null.
+ *
+ * @since 1.3.0
  */
 case class IsNull(attribute: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a non-null value.
+ *
+ * @since 1.3.0
  */
 case class IsNotNull(attribute: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff both `left` or `right` evaluate to `true`.
+ *
+ * @since 1.3.0
  */
 case class And(left: Filter, right: Filter) extends Filter
 
 /**
  * A filter that evaluates to `true` iff at least one of `left` or `right` evaluates to `true`.
+ *
+ * @since 1.3.0
  */
 case class Or(left: Filter, right: Filter) extends Filter
 
 /**
  * A filter that evaluates to `true` iff `child` is evaluated to `false`.
+ *
+ * @since 1.3.0
  */
 case class Not(child: Filter) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
  * a string that starts with `value`.
+ *
+ * @since 1.3.1
  */
 case class StringStartsWith(attribute: String, value: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
  * a string that starts with `value`.
+ *
+ * @since 1.3.1
  */
 case class StringEndsWith(attribute: String, value: String) extends Filter
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
  * a string that contains the string `value`.
+ *
+ * @since 1.3.1
  */
 case class StringContains(attribute: String, value: String) extends Filter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 5e010d21120f6..6f315305c11d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -43,6 +43,8 @@ import org.apache.spark.sql.types.{StructField, StructType}
  * data source 'org.apache.spark.sql.json.DefaultSource'
  *
  * A new instance of this class with be instantiated each time a DDL call is made.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait RelationProvider {
@@ -72,6 +74,8 @@ trait RelationProvider {
  * users need to provide a schema when using a SchemaRelationProvider.
  * A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]]
  * if it can support both schema inference and user-specified schemas.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait SchemaRelationProvider {
@@ -106,6 +110,8 @@ trait SchemaRelationProvider {
  * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]],
  * and [[FSBasedRelationProvider]] if it can support schema inference, user-specified
  * schemas, and accessing partitioned relations.
+ *
+ * @since 1.4.0
  */
 trait FSBasedRelationProvider {
   /**
@@ -121,6 +127,9 @@ trait FSBasedRelationProvider {
       parameters: Map[String, String]): FSBasedRelation
 }
 
+/**
+ * @since 1.3.0
+ */
 @DeveloperApi
 trait CreatableRelationProvider {
   /**
@@ -134,6 +143,8 @@ trait CreatableRelationProvider {
     * existing data is expected to be overwritten by the contents of the DataFrame.
     * ErrorIfExists mode means that when saving a DataFrame to a data source,
     * if data already exists, an exception is expected to be thrown.
+     *
+     * @since 1.3.0
     */
   def createRelation(
       sqlContext: SQLContext,
@@ -152,6 +163,8 @@ trait CreatableRelationProvider {
  * BaseRelations must also define a equality function that only returns true when the two
  * instances will return the same data. This equality function is used when determining when
  * it is safe to substitute cached results for a given relation.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 abstract class BaseRelation {
@@ -167,6 +180,8 @@ abstract class BaseRelation {
    *
    * Note that it is always better to overestimate size than underestimate, because underestimation
    * could lead to execution plans that are suboptimal (i.e. broadcasting a very large table).
+   *
+   * @since 1.3.0
    */
   def sizeInBytes: Long = sqlContext.conf.defaultSizeInBytes
 
@@ -177,6 +192,8 @@ abstract class BaseRelation {
    *
    * Note: The internal representation is not stable across releases and thus data sources outside
    * of Spark SQL should leave this as true.
+   *
+   * @since 1.4.0
    */
   def needConversion: Boolean = true
 }
@@ -184,6 +201,8 @@ abstract class BaseRelation {
 /**
  * ::DeveloperApi::
  * A BaseRelation that can produce all of its tuples as an RDD of Row objects.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait TableScan {
@@ -194,6 +213,8 @@ trait TableScan {
  * ::DeveloperApi::
  * A BaseRelation that can eliminate unneeded columns before producing an RDD
  * containing all of its tuples as Row objects.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait PrunedScan {
@@ -211,6 +232,8 @@ trait PrunedScan {
  * The pushed down filters are currently purely an optimization as they will all be evaluated
  * again.  This means it is safe to use them with methods that produce false positives such
  * as filtering partitions based on a bloom filter.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait PrunedFilteredScan {
@@ -232,6 +255,8 @@ trait PrunedFilteredScan {
  * 3. It assumes that fields of the data provided in the insert method are nullable.
  * If a data source needs to check the actual nullability of a field, it needs to do it in the
  * insert method.
+ *
+ * @since 1.3.0
  */
 @DeveloperApi
 trait InsertableRelation {
@@ -245,6 +270,8 @@ trait InsertableRelation {
  * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].  Unlike the other APIs this
  * interface is NOT designed to be binary compatible across releases and thus should only be used
  * for experimentation.
+ *
+ * @since 1.3.0
  */
 @Experimental
 trait CatalystScan {
@@ -257,6 +284,8 @@ trait CatalystScan {
  * underlying file system.  Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
  * An [[OutputWriter]] instance is created and initialized when a new output file is opened on
  * executor side.  This instance is used to persist rows to this single output file.
+ *
+ * @since 1.4.0
  */
 @Experimental
 abstract class OutputWriter {
@@ -270,6 +299,8 @@ abstract class OutputWriter {
    * @param dataSchema Schema of the rows to be written. Partition columns are not included in the
    *        schema if the corresponding relation is partitioned.
    * @param context The Hadoop MapReduce task context.
+   *
+   * @since 1.4.0
    */
   def init(
       path: String,
@@ -279,12 +310,16 @@ abstract class OutputWriter {
   /**
    * Persists a single row.  Invoked on the executor side.  When writing to dynamically partitioned
    * tables, dynamic partition columns are not included in rows to be written.
+   *
+   * @since 1.4.0
    */
   def write(row: Row): Unit
 
   /**
    * Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before
    * the task output is committed.
+   *
+   * @since 1.4.0
    */
   def close(): Unit
 }
@@ -310,6 +345,8 @@ abstract class OutputWriter {
  *        directories of all partition directories.
  * @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional
  *        [[PartitionSpec]], so that partition discovery can be skipped.
+ *
+ * @since 1.4.0
  */
 @Experimental
 abstract class FSBasedRelation private[sql](
@@ -323,6 +360,8 @@ abstract class FSBasedRelation private[sql](
    * @param paths Base paths of this relation.  For partitioned relations, it should be either root
    *        directories of all partition directories.
    * @param partitionColumns Partition columns of this relation.
+   *
+   * @since 1.4.0
    */
   def this(paths: Array[String], partitionColumns: StructType) =
     this(paths, {
@@ -335,6 +374,8 @@ abstract class FSBasedRelation private[sql](
    *
    * @param paths Base paths of this relation.  For partitioned relations, it should be root
    *        directories of all partition directories.
+   *
+   * @since 1.4.0
    */
   def this(paths: Array[String]) = this(paths, None)
 
@@ -356,6 +397,8 @@ abstract class FSBasedRelation private[sql](
 
   /**
    * Partition columns. Note that they are always nullable.
+   *
+   * @since 1.4.0
    */
   def partitionColumns: StructType = partitionSpec.partitionColumns
 
@@ -385,6 +428,8 @@ abstract class FSBasedRelation private[sql](
   /**
    * Schema of this relation.  It consists of columns appearing in [[dataSchema]] and all partition
    * columns not appearing in [[dataSchema]].
+   *
+   * @since 1.4.0
    */
   override lazy val schema: StructType = {
     val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet
@@ -396,6 +441,8 @@ abstract class FSBasedRelation private[sql](
   /**
    * Specifies schema of actual data files.  For partitioned relations, if one or more partitioned
    * columns are contained in the data files, they should also appear in `dataSchema`.
+   *
+   * @since 1.4.0
    */
   def dataSchema: StructType
 
@@ -407,6 +454,8 @@ abstract class FSBasedRelation private[sql](
    * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
+   *
+   * @since 1.4.0
    */
   def buildScan(inputPaths: Array[String]): RDD[Row] = {
     throw new RuntimeException(
@@ -422,6 +471,8 @@ abstract class FSBasedRelation private[sql](
    * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
+   *
+   * @since 1.4.0
    */
   def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = {
     // Yeah, to workaround serialization...
@@ -458,6 +509,8 @@ abstract class FSBasedRelation private[sql](
    * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
+   *
+   * @since 1.4.0
    */
   def buildScan(
       requiredColumns: Array[String],
@@ -473,12 +526,16 @@ abstract class FSBasedRelation private[sql](
    * Note that the only side effect expected here is mutating `job` via its setters.  Especially,
    * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states
    * may cause unexpected behaviors.
+   *
+   * @since 1.4.0
    */
   def prepareForWrite(job: Job): Unit = ()
 
   /**
    * This method is responsible for producing a new [[OutputWriter]] for each newly opened output
    * file on the executor side.
+   *
+   * @since 1.4.0
    */
   def outputWriterClass: Class[_ <: OutputWriter]
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 61e8c154e8c3c..766c42d040f80 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -63,6 +63,8 @@ private[hive] class HiveQLDialect extends ParserDialect {
 /**
  * An instance of the Spark SQL execution engine that integrates with data stored in Hive.
  * Configuration for Hive is read from hive-site.xml on the classpath.
+ *
+ * @since 1.0.0
  */
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
@@ -225,6 +227,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    * Spark SQL or the external data source library it uses might cache certain metadata about a
    * table, such as the location of blocks. When those change outside of Spark SQL, users should
    * call this function to invalidate the cache.
+   *
+   * @since 1.3.0
    */
   def refreshTable(tableName: String): Unit = {
     // TODO: Database support...
@@ -242,6 +246,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
    *
    * Right now, it only supports Hive tables and it only updates the size of a Hive table
    * in the Hive metastore.
+   *
+   * @since 1.2.0
    */
   @Experimental
   def analyze(tableName: String) {

From 97dee313f23b00f15638cb72a4a80c1f197f8a9d Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 12 May 2015 21:43:34 -0700
Subject: [PATCH 132/320] [SPARK-7321][SQL] Add Column expression for
 conditional statements (when/otherwise)

This builds on https://github.com/apache/spark/pull/5932 and should close https://github.com/apache/spark/pull/5932 as well.

As an example:
```python
df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
```

Author: Reynold Xin <rxin@databricks.com>
Author: kaka1992 <kaka_1992@163.com>

Closes #6072 from rxin/when-expr and squashes the following commits:

8f49201 [Reynold Xin] Throw exception if otherwise is applied twice.
0455eda [Reynold Xin] Reset run-tests.
bfb9d9f [Reynold Xin] Updated documentation and test cases.
762f6a5 [Reynold Xin] Merge pull request #5932 from kaka1992/IFCASE
95724c6 [kaka1992] Update
8218d0a [kaka1992] Update
801009e [kaka1992] Update
76d6346 [kaka1992] [SPARK-7321][SQL] Add Column expression for conditional statements (if, case)
---
 python/pyspark/sql/__init__.py                |  2 +
 python/pyspark/sql/dataframe.py               | 31 ++++++++++
 python/pyspark/sql/functions.py               | 26 +++++++-
 .../scala/org/apache/spark/sql/Column.scala   | 61 +++++++++++++++++++
 .../org/apache/spark/sql/functions.scala      | 24 ++++++++
 .../spark/sql/ColumnExpressionSuite.scala     | 21 +++++++
 6 files changed, 163 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index b60b991dd4d8b..7192c89b3dc7f 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -32,6 +32,8 @@
       Aggregation methods, returned by :func:`DataFrame.groupBy`.
     - L{DataFrameNaFunctions}
       Methods for handling missing data (null values).
+    - L{DataFrameStatFunctions}
+      Methods for statistics functionality.
     - L{functions}
       List of built-in functions available for :class:`DataFrame`.
     - L{types}
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 078acfdf7e2df..82cb1c2fdbf94 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1546,6 +1546,37 @@ def between(self, lowerBound, upperBound):
         """
         return (self >= lowerBound) & (self <= upperBound)
 
+    @ignore_unicode_prefix
+    def when(self, condition, value):
+        """Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param condition: a boolean :class:`Column` expression.
+        :param value: a literal value, or a :class:`Column` expression.
+
+        """
+        sc = SparkContext._active_spark_context
+        if not isinstance(condition, Column):
+            raise TypeError("condition should be a Column")
+        v = value._jc if isinstance(value, Column) else value
+        jc = sc._jvm.functions.when(condition._jc, v)
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    def otherwise(self, value):
+        """Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param value: a literal value, or a :class:`Column` expression.
+        """
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.otherwise(value)
+        return Column(jc)
+
     def __repr__(self):
         return 'Column<%s>' % self._jc.toString().encode('utf8')
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 38a043a3c59d7..d91265ee0bec8 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -32,13 +32,14 @@
 
 __all__ = [
     'approxCountDistinct',
+    'coalesce',
     'countDistinct',
     'monotonicallyIncreasingId',
     'rand',
     'randn',
     'sparkPartitionId',
-    'coalesce',
-    'udf']
+    'udf',
+    'when']
 
 
 def _create_function(name, doc=""):
@@ -291,6 +292,27 @@ def struct(*cols):
     return Column(jc)
 
 
+def when(condition, value):
+    """Evaluates a list of conditions and returns one of multiple possible result expressions.
+    If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+    :param condition: a boolean :class:`Column` expression.
+    :param value: a literal value, or a :class:`Column` expression.
+
+    >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
+    [Row(age=3), Row(age=4)]
+
+    >>> df.select(when(df.age == 2, df.age + 1).alias("age")).collect()
+    [Row(age=3), Row(age=None)]
+    """
+    sc = SparkContext._active_spark_context
+    if not isinstance(condition, Column):
+        raise TypeError("condition should be a Column")
+    v = value._jc if isinstance(value, Column) else value
+    jc = sc._jvm.functions.when(condition._jc, v)
+    return Column(jc)
+
+
 class UserDefinedFunction(object):
     """
     User defined function in Python
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4773dedf72117..42f5bcda49cfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -327,6 +327,67 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    */
   def eqNullSafe(other: Any): Column = this <=> other
 
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def when(condition: Column, value: Any):Column = this.expr match {
+    case CaseWhen(branches: Seq[Expression]) =>
+      CaseWhen(branches ++ Seq(lit(condition).expr, lit(value).expr))
+    case _ =>
+      throw new IllegalArgumentException(
+        "when() can only be applied on a Column previously generated by when() function")
+  }
+
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group expr_ops
+   */
+  def otherwise(value: Any):Column = this.expr match {
+    case CaseWhen(branches: Seq[Expression]) =>
+      if (branches.size % 2 == 0) {
+        CaseWhen(branches :+ lit(value).expr)
+      } else {
+        throw new IllegalArgumentException(
+          "otherwise() can only be applied once on a Column previously generated by when()")
+      }
+    case _ =>
+      throw new IllegalArgumentException(
+        "otherwise() can only be applied on a Column previously generated by when()")
+  }
+
   /**
    * True if the current column is between the lower bound and upper bound, inclusive.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 215787e40bf0a..099e1d8f03272 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -419,6 +419,30 @@ object functions {
    */
   def not(e: Column): Column = !e
 
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions.
+   * If otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group normal_funcs
+   */
+  def when(condition: Column, value: Any): Column = {
+    CaseWhen(Seq(condition.expr, lit(value).expr))
+  }
+
   /**
    * Generate a random column with i.i.d. samples from U[0.0, 1.0].
    *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index d96186c268720..269e185543059 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -255,6 +255,27 @@ class ColumnExpressionSuite extends QueryTest {
       Row(false, true) :: Row(true, false) :: Row(true, true) :: Nil)
   }
 
+  test("SPARK-7321 when conditional statements") {
+    val testData = (1 to 3).map(i => (i, i.toString)).toDF("key", "value")
+
+    checkAnswer(
+      testData.select(when($"key" === 1, -1).when($"key" === 2, -2).otherwise(0)),
+      Seq(Row(-1), Row(-2), Row(0))
+    )
+
+    // Without the ending otherwise, return null for unmatched conditions.
+    // Also test putting a non-literal value in the expression.
+    checkAnswer(
+      testData.select(when($"key" === 1, lit(0) - $"key").when($"key" === 2, -2)),
+      Seq(Row(-1), Row(-2), Row(null))
+    )
+
+    // Test error handling for invalid expressions.
+    intercept[IllegalArgumentException] { $"key".when($"key" === 1, -1) }
+    intercept[IllegalArgumentException] { $"key".otherwise(-1) }
+    intercept[IllegalArgumentException] { when($"key" === 1, -1).otherwise(-1).otherwise(-1) }
+  }
+
   test("sqrt") {
     checkAnswer(
       testData.select(sqrt('key)).orderBy('key.asc),

From 208b902257bbfb85bf8cadfc942b7134ad690f8b Mon Sep 17 00:00:00 2001
From: "Santiago M. Mola" <santi@mola.io>
Date: Tue, 12 May 2015 23:44:21 -0700
Subject: [PATCH 133/320] [SPARK-7566][SQL] Add type to HiveContext.analyzer

This makes HiveContext.analyzer overrideable.

Author: Santiago M. Mola <santi@mola.io>

Closes #6086 from smola/patch-3 and squashes the following commits:

8ece136 [Santiago M. Mola] [SPARK-7566][SQL] Add type to HiveContext.analyzer
---
 .../src/main/scala/org/apache/spark/sql/hive/HiveContext.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 766c42d040f80..9d98c36e947a1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -335,7 +335,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
   /* An analyzer that uses the Hive metastore. */
   @transient
-  override protected[sql] lazy val analyzer =
+  override protected[sql] lazy val analyzer: Analyzer =
     new Analyzer(catalog, functionRegistry, conf) {
       override val extendedResolutionRules =
         catalog.ParquetConversions ::

From df9b94a57cbd0e028228059d215b446d59d25ba8 Mon Sep 17 00:00:00 2001
From: Sun Rui <rui.sun@intel.com>
Date: Tue, 12 May 2015 23:52:30 -0700
Subject: [PATCH 134/320] [SPARK-7482] [SPARKR] Rename some DataFrame API
 methods in SparkR to match their counterparts in Scala.

Author: Sun Rui <rui.sun@intel.com>

Closes #6007 from sun-rui/SPARK-7482 and squashes the following commits:

5c5cf5e [Sun Rui] Implement alias loadDF() as a new function.
3a30c10 [Sun Rui] Rename load()/save() to read.df()/write.df(). Also add loadDF()/saveDF() as aliases.
9f569d6 [Sun Rui] [SPARK-7482][SparkR] Rename some DataFrame API methods in SparkR to match their counterparts in Scala.
---
 R/pkg/NAMESPACE                  |  6 +++--
 R/pkg/R/DataFrame.R              | 35 +++++++++++++++++-----------
 R/pkg/R/RDD.R                    |  4 ++--
 R/pkg/R/SQLContext.R             | 13 ++++++++---
 R/pkg/R/generics.R               | 22 +++++++++++-------
 R/pkg/inst/tests/test_sparkSQL.R | 40 ++++++++++++++++----------------
 6 files changed, 71 insertions(+), 49 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 819e9a24e5c0e..ba29614e7b179 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -37,7 +37,7 @@ exportMethods("arrange",
               "registerTempTable",
               "rename",
               "repartition",
-              "sampleDF",
+              "sample",
               "sample_frac",
               "saveAsParquetFile",
               "saveAsTable",
@@ -53,7 +53,8 @@ exportMethods("arrange",
               "unpersist",
               "where",
               "withColumn",
-              "withColumnRenamed")
+              "withColumnRenamed",
+              "write.df")
 
 exportClasses("Column")
 
@@ -101,6 +102,7 @@ export("cacheTable",
        "jsonFile",
        "loadDF",
        "parquetFile",
+       "read.df",
        "sql",
        "table",
        "tableNames",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 2705817531019..a7fa32e291fb1 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -294,8 +294,8 @@ setMethod("registerTempTable",
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
-#' df <- loadDF(sqlCtx, path, "parquet")
-#' df2 <- loadDF(sqlCtx, path2, "parquet")
+#' df <- read.df(sqlCtx, path, "parquet")
+#' df2 <- read.df(sqlCtx, path2, "parquet")
 #' registerTempTable(df, "table1")
 #' insertInto(df2, "table1", overwrite = TRUE)
 #'}
@@ -473,14 +473,14 @@ setMethod("distinct",
             dataFrame(sdf)
           })
 
-#' SampleDF
+#' Sample
 #'
 #' Return a sampled subset of this DataFrame using a random seed.
 #'
 #' @param x A SparkSQL DataFrame
 #' @param withReplacement Sampling with replacement or not
 #' @param fraction The (rough) sample target fraction
-#' @rdname sampleDF
+#' @rdname sample
 #' @aliases sample_frac
 #' @export
 #' @examples
@@ -489,10 +489,10 @@ setMethod("distinct",
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' collect(sampleDF(df, FALSE, 0.5)) 
-#' collect(sampleDF(df, TRUE, 0.5))
+#' collect(sample(df, FALSE, 0.5)) 
+#' collect(sample(df, TRUE, 0.5))
 #'}
-setMethod("sampleDF",
+setMethod("sample",
           # TODO : Figure out how to send integer as java.lang.Long to JVM so
           # we can send seed as an argument through callJMethod
           signature(x = "DataFrame", withReplacement = "logical",
@@ -503,13 +503,13 @@ setMethod("sampleDF",
             dataFrame(sdf)
           })
 
-#' @rdname sampleDF
-#' @aliases sampleDF
+#' @rdname sample
+#' @aliases sample
 setMethod("sample_frac",
           signature(x = "DataFrame", withReplacement = "logical",
                     fraction = "numeric"),
           function(x, withReplacement, fraction) {
-            sampleDF(x, withReplacement, fraction)
+            sample(x, withReplacement, fraction)
           })
 
 #' Count
@@ -1303,7 +1303,7 @@ setMethod("except",
 #' @param source A name for external data source
 #' @param mode One of 'append', 'overwrite', 'error', 'ignore'
 #'
-#' @rdname saveAsTable
+#' @rdname write.df 
 #' @export
 #' @examples
 #'\dontrun{
@@ -1311,9 +1311,9 @@ setMethod("except",
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' saveAsTable(df, "myfile")
+#' write.df(df, "myfile", "parquet", "overwrite")
 #' }
-setMethod("saveDF",
+setMethod("write.df",
           signature(df = "DataFrame", path = 'character', source = 'character',
                     mode = 'character'),
           function(df, path = NULL, source = NULL, mode = "append", ...){
@@ -1334,6 +1334,15 @@ setMethod("saveDF",
             callJMethod(df@sdf, "save", source, jmode, options)
           })
 
+#' @rdname write.df
+#' @aliases saveDF
+#' @export
+setMethod("saveDF",
+          signature(df = "DataFrame", path = 'character', source = 'character',
+                    mode = 'character'),
+          function(df, path = NULL, source = NULL, mode = "append", ...){
+            write.df(df, path, source, mode, ...)
+          })
 
 #' saveAsTable
 #'
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 9138629cac9c0..d3a68fff780ce 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -927,7 +927,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
                                                                     MAXINT)))))
 
             # TODO(zongheng): investigate if this call is an in-place shuffle?
-            sample(samples)[1:total]
+            base::sample(samples)[1:total]
           })
 
 # Creates tuples of the elements in this RDD by applying a function.
@@ -996,7 +996,7 @@ setMethod("coalesce",
              if (shuffle || numPartitions > SparkR:::numPartitions(x)) {
                func <- function(partIndex, part) {
                  set.seed(partIndex)  # partIndex as seed
-                 start <- as.integer(sample(numPartitions, 1) - 1)
+                 start <- as.integer(base::sample(numPartitions, 1) - 1)
                  lapply(seq_along(part),
                         function(i) {
                           pos <- (start + i) %% numPartitions
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index cae06e6af2bff..531442e8459e4 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -421,7 +421,7 @@ clearCache <- function(sqlCtx) {
 #' \dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
-#' df <- loadDF(sqlCtx, path, "parquet")
+#' df <- read.df(sqlCtx, path, "parquet")
 #' registerTempTable(df, "table")
 #' dropTempTable(sqlCtx, "table")
 #' }
@@ -450,10 +450,10 @@ dropTempTable <- function(sqlCtx, tableName) {
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
-#' df <- load(sqlCtx, "path/to/file.json", source = "json")
+#' df <- read.df(sqlCtx, "path/to/file.json", source = "json")
 #' }
 
-loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
+read.df <- function(sqlCtx, path = NULL, source = NULL, ...) {
   options <- varargsToEnv(...)
   if (!is.null(path)) {
     options[['path']] <- path
@@ -462,6 +462,13 @@ loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
   dataFrame(sdf)
 }
 
+#' @aliases loadDF
+#' @export
+
+loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
+  read.df(sqlCtx, path, source, ...)
+}
+
 #' Create an external table
 #'
 #' Creates an external table based on the dataset in a data source,
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 557128a419f19..6d2bfb1181e5a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -456,19 +456,19 @@ setGeneric("rename", function(x, ...) { standardGeneric("rename") })
 #' @export
 setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
 
-#' @rdname sampleDF
+#' @rdname sample
 #' @export
-setGeneric("sample_frac",
+setGeneric("sample",
            function(x, withReplacement, fraction, seed) {
-             standardGeneric("sample_frac")
-          })
+             standardGeneric("sample")
+           })
 
-#' @rdname sampleDF
+#' @rdname sample
 #' @export
-setGeneric("sampleDF",
+setGeneric("sample_frac",
            function(x, withReplacement, fraction, seed) {
-             standardGeneric("sampleDF")
-          })
+             standardGeneric("sample_frac")
+           })
 
 #' @rdname saveAsParquetFile
 #' @export
@@ -480,7 +480,11 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) {
   standardGeneric("saveAsTable")
 })
 
-#' @rdname saveAsTable
+#' @rdname write.df
+#' @export
+setGeneric("write.df", function(df, path, source, mode, ...) { standardGeneric("write.df") })
+
+#' @rdname write.df
 #' @export
 setGeneric("saveDF", function(df, path, source, mode, ...) { standardGeneric("saveDF") })
 
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 99c28830c6237..1109e8fdba3fd 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -209,18 +209,18 @@ test_that("registerTempTable() results in a queryable table and sql() results in
 })
 
 test_that("insertInto() on a registered table", {
-  df <- loadDF(sqlCtx, jsonPath, "json")
-  saveDF(df, parquetPath, "parquet", "overwrite")
-  dfParquet <- loadDF(sqlCtx, parquetPath, "parquet")
+  df <- read.df(sqlCtx, jsonPath, "json")
+  write.df(df, parquetPath, "parquet", "overwrite")
+  dfParquet <- read.df(sqlCtx, parquetPath, "parquet")
 
   lines <- c("{\"name\":\"Bob\", \"age\":24}",
              "{\"name\":\"James\", \"age\":35}")
   jsonPath2 <- tempfile(pattern="jsonPath2", fileext=".tmp")
   parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
   writeLines(lines, jsonPath2)
-  df2 <- loadDF(sqlCtx, jsonPath2, "json")
-  saveDF(df2, parquetPath2, "parquet", "overwrite")
-  dfParquet2 <- loadDF(sqlCtx, parquetPath2, "parquet")
+  df2 <- read.df(sqlCtx, jsonPath2, "json")
+  write.df(df2, parquetPath2, "parquet", "overwrite")
+  dfParquet2 <- read.df(sqlCtx, parquetPath2, "parquet")
 
   registerTempTable(dfParquet, "table1")
   insertInto(dfParquet2, "table1")
@@ -421,12 +421,12 @@ test_that("distinct() on DataFrames", {
   expect_true(count(uniques) == 3)
 })
 
-test_that("sampleDF on a DataFrame", {
+test_that("sample on a DataFrame", {
   df <- jsonFile(sqlCtx, jsonPath)
-  sampled <- sampleDF(df, FALSE, 1.0)
+  sampled <- sample(df, FALSE, 1.0)
   expect_equal(nrow(collect(sampled)), count(df))
   expect_true(inherits(sampled, "DataFrame"))
-  sampled2 <- sampleDF(df, FALSE, 0.1)
+  sampled2 <- sample(df, FALSE, 0.1)
   expect_true(count(sampled2) < 3)
 
   # Also test sample_frac
@@ -491,16 +491,16 @@ test_that("column calculation", {
   expect_true(count(df2) == 3)
 })
 
-test_that("load() from json file", {
-  df <- loadDF(sqlCtx, jsonPath, "json")
+test_that("read.df() from json file", {
+  df <- read.df(sqlCtx, jsonPath, "json")
   expect_true(inherits(df, "DataFrame"))
   expect_true(count(df) == 3)
 })
 
-test_that("save() as parquet file", {
-  df <- loadDF(sqlCtx, jsonPath, "json")
-  saveDF(df, parquetPath, "parquet", mode="overwrite")
-  df2 <- loadDF(sqlCtx, parquetPath, "parquet")
+test_that("write.df() as parquet file", {
+  df <- read.df(sqlCtx, jsonPath, "json")
+  write.df(df, parquetPath, "parquet", mode="overwrite")
+  df2 <- read.df(sqlCtx, parquetPath, "parquet")
   expect_true(inherits(df2, "DataFrame"))
   expect_true(count(df2) == 3)
 })
@@ -670,7 +670,7 @@ test_that("unionAll(), except(), and intersect() on a DataFrame", {
              "{\"name\":\"James\", \"age\":35}")
   jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp")
   writeLines(lines, jsonPath2)
-  df2 <- loadDF(sqlCtx, jsonPath2, "json")
+  df2 <- read.df(sqlCtx, jsonPath2, "json")
 
   unioned <- arrange(unionAll(df, df2), df$age)
   expect_true(inherits(unioned, "DataFrame"))
@@ -712,9 +712,9 @@ test_that("mutate() and rename()", {
   expect_true(columns(newDF2)[1] == "newerAge")
 })
 
-test_that("saveDF() on DataFrame and works with parquetFile", {
+test_that("write.df() on DataFrame and works with parquetFile", {
   df <- jsonFile(sqlCtx, jsonPath)
-  saveDF(df, parquetPath, "parquet", mode="overwrite")
+  write.df(df, parquetPath, "parquet", mode="overwrite")
   parquetDF <- parquetFile(sqlCtx, parquetPath)
   expect_true(inherits(parquetDF, "DataFrame"))
   expect_equal(count(df), count(parquetDF))
@@ -722,9 +722,9 @@ test_that("saveDF() on DataFrame and works with parquetFile", {
 
 test_that("parquetFile works with multiple input paths", {
   df <- jsonFile(sqlCtx, jsonPath)
-  saveDF(df, parquetPath, "parquet", mode="overwrite")
+  write.df(df, parquetPath, "parquet", mode="overwrite")
   parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
-  saveDF(df, parquetPath2, "parquet", mode="overwrite")
+  write.df(df, parquetPath2, "parquet", mode="overwrite")
   parquetDF <- parquetFile(sqlCtx, parquetPath, parquetPath2)
   expect_true(inherits(parquetDF, "DataFrame"))
   expect_true(count(parquetDF) == count(df)*2)

From 98195c3031fe60683bb25840f135458d5d0e52c5 Mon Sep 17 00:00:00 2001
From: linweizhong <linweizhong@huawei.com>
Date: Tue, 12 May 2015 23:55:44 -0700
Subject: [PATCH 135/320] [SPARK-7526] [SPARKR] Specify ip of RBackend,
 MonitorServer and RRDD Socket server

These R process only used to communicate with JVM process on local, so binding to localhost is more reasonable then wildcard ip.

Author: linweizhong <linweizhong@huawei.com>

Closes #6053 from Sephiroth-Lin/spark-7526 and squashes the following commits:

5303af7 [linweizhong] bind to localhost rather than wildcard ip
---
 core/src/main/scala/org/apache/spark/api/r/RBackend.scala | 6 +++---
 core/src/main/scala/org/apache/spark/api/r/RRDD.scala     | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index 3a2c94bd9d875..0a91977928cee 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.api.r
 
 import java.io.{DataOutputStream, File, FileOutputStream, IOException}
-import java.net.{InetSocketAddress, ServerSocket}
+import java.net.{InetAddress, InetSocketAddress, ServerSocket}
 import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
@@ -65,7 +65,7 @@ private[spark] class RBackend {
       }
     })
 
-    channelFuture = bootstrap.bind(new InetSocketAddress(0))
+    channelFuture = bootstrap.bind(new InetSocketAddress("localhost", 0))
     channelFuture.syncUninterruptibly()
     channelFuture.channel().localAddress().asInstanceOf[InetSocketAddress].getPort()
   }
@@ -101,7 +101,7 @@ private[spark] object RBackend extends Logging {
     try {
       // bind to random port
       val boundPort = sparkRBackend.init()
-      val serverSocket = new ServerSocket(0, 1)
+      val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
       val listenPort = serverSocket.getLocalPort()
 
       // tell the R process via temporary file
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index 6fea5e1144f2f..06247f7e8b78c 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.api.r
 
 import java.io._
-import java.net.ServerSocket
+import java.net.{InetAddress, ServerSocket}
 import java.util.{Map => JMap}
 
 import scala.collection.JavaConversions._
@@ -55,7 +55,7 @@ private abstract class BaseRRDD[T: ClassTag, U: ClassTag](
     val parentIterator = firstParent[T].iterator(partition, context)
 
     // we expect two connections
-    val serverSocket = new ServerSocket(0, 2)
+    val serverSocket = new ServerSocket(0, 2, InetAddress.getByName("localhost"))
     val listenPort = serverSocket.getLocalPort()
 
     // The stdout/stderr is shared by multiple tasks, because we use one daemon
@@ -414,7 +414,7 @@ private[r] object RRDD {
       synchronized {
         if (daemonChannel == null) {
           // we expect one connections
-          val serverSocket = new ServerSocket(0, 1)
+          val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
           val daemonPort = serverSocket.getLocalPort
           errThread = createRProcess(rLibDir, daemonPort, "daemon.R")
           // the socket used to send out the input of task

From 50c72708015fba15d0e78946f1f4ec262776bc38 Mon Sep 17 00:00:00 2001
From: Masayoshi TSUZUKI <tsudukim@oss.nttdata.co.jp>
Date: Wed, 13 May 2015 09:43:40 +0100
Subject: [PATCH 136/320] [SPARK-6568] spark-shell.cmd --jars option does not
 accept the jar that has space in its path

escape spaces in the arguments.

Author: Masayoshi TSUZUKI <tsudukim@oss.nttdata.co.jp>
Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #5447 from tsudukim/feature/SPARK-6568-2 and squashes the following commits:

3f9a188 [Masayoshi TSUZUKI] modified some errors.
ed46047 [Masayoshi TSUZUKI] avoid scalastyle errors.
1784239 [Masayoshi TSUZUKI] removed Utils.formatPath.
e03f289 [Masayoshi TSUZUKI] removed testWindows from Utils.resolveURI and Utils.resolveURIs. replaced SystemUtils.IS_OS_WINDOWS to Utils.isWindows. removed Utils.formatPath from PythonRunner.scala.
84c33d0 [Masayoshi TSUZUKI] - use resolveURI in nonLocalPaths - run tests for Windows path only on Windows
016128d [Masayoshi TSUZUKI] fixed to use File.toURI()
2c62e3b [Masayoshi TSUZUKI] Merge pull request #1 from sarutak/SPARK-6568-2
7019a8a [Masayoshi TSUZUKI] Merge branch 'master' of https://github.com/apache/spark into feature/SPARK-6568-2
45946ee [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-6568-2
10f1c73 [Kousuke Saruta] Added a comment
93c3c40 [Kousuke Saruta] Merge branch 'classpath-handling-fix' of github.com:sarutak/spark into SPARK-6568-2
649da82 [Kousuke Saruta] Fix classpath handling
c7ba6a7 [Masayoshi TSUZUKI] [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path
---
 .../apache/spark/deploy/PythonRunner.scala    | 23 ++++---
 .../scala/org/apache/spark/util/Utils.scala   | 43 ++++--------
 .../spark/deploy/PythonRunnerSuite.scala      | 31 +++++----
 .../org/apache/spark/util/UtilsSuite.scala    | 67 ++++++++++++-------
 .../org/apache/spark/repl/SparkILoop.scala    |  5 +-
 5 files changed, 89 insertions(+), 80 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 53e18c4bcec23..c2ed43a5397d6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.deploy
 
 import java.net.URI
+import java.io.File
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.JavaConversions._
+import scala.util.Try
 
 import org.apache.spark.api.python.PythonUtils
 import org.apache.spark.util.{RedirectThread, Utils}
@@ -81,16 +83,13 @@ object PythonRunner {
       throw new IllegalArgumentException("Launching Python applications through " +
         s"spark-submit is currently only supported for local files: $path")
     }
-    val windows = Utils.isWindows || testWindows
-    var formattedPath = if (windows) Utils.formatWindowsPath(path) else path
-
-    // Strip the URI scheme from the path
-    formattedPath =
-      new URI(formattedPath).getScheme match {
-        case null => formattedPath
-        case Utils.windowsDrive(d) if windows => formattedPath
-        case _ => new URI(formattedPath).getPath
-      }
+    // get path when scheme is file.
+    val uri = Try(new URI(path)).getOrElse(new File(path).toURI)
+    var formattedPath = uri.getScheme match {
+      case null => path
+      case "file" | "local" => uri.getPath
+      case _ => null
+    }
 
     // Guard against malformed paths potentially throwing NPE
     if (formattedPath == null) {
@@ -99,7 +98,9 @@ object PythonRunner {
 
     // In Windows, the drive should not be prefixed with "/"
     // For instance, python does not understand "/C:/path/to/sheep.py"
-    formattedPath = if (windows) formattedPath.stripPrefix("/") else formattedPath
+    if (Utils.isWindows && formattedPath.matches("/[a-zA-Z]:/.*")) {
+      formattedPath = formattedPath.stripPrefix("/")
+    }
     formattedPath
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index be4db02ab86d0..48843b4ae57c6 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1704,11 +1704,6 @@ private[spark] object Utils extends Logging {
    */
   val windowsDrive = "([a-zA-Z])".r
 
-  /**
-   * Format a Windows path such that it can be safely passed to a URI.
-   */
-  def formatWindowsPath(path: String): String = path.replace("\\", "/")
-
   /**
    * Indicates whether Spark is currently running unit tests.
    */
@@ -1806,37 +1801,24 @@ private[spark] object Utils extends Logging {
    * If the supplied path does not contain a scheme, or is a relative path, it will be
    * converted into an absolute path with a file:// scheme.
    */
-  def resolveURI(path: String, testWindows: Boolean = false): URI = {
-
-    // In Windows, the file separator is a backslash, but this is inconsistent with the URI format
-    val windows = isWindows || testWindows
-    val formattedPath = if (windows) formatWindowsPath(path) else path
-
-    val uri = new URI(formattedPath)
-    if (uri.getPath == null) {
-      throw new IllegalArgumentException(s"Given path is malformed: $uri")
-    }
-
-    Option(uri.getScheme) match {
-      case Some(windowsDrive(d)) if windows =>
-        new URI("file:/" + uri.toString.stripPrefix("/"))
-      case None =>
-        // Preserve fragments for HDFS file name substitution (denoted by "#")
-        // For instance, in "abc.py#xyz.py", "xyz.py" is the name observed by the application
-        val fragment = uri.getFragment
-        val part = new File(uri.getPath).toURI
-        new URI(part.getScheme, part.getPath, fragment)
-      case Some(other) =>
-        uri
+  def resolveURI(path: String): URI = {
+    try {
+      val uri = new URI(path)
+      if (uri.getScheme() != null) {
+        return uri
+      }
+    } catch {
+      case e: URISyntaxException =>
     }
+    new File(path).getAbsoluteFile().toURI()
   }
 
   /** Resolve a comma-separated list of paths. */
-  def resolveURIs(paths: String, testWindows: Boolean = false): String = {
+  def resolveURIs(paths: String): String = {
     if (paths == null || paths.trim.isEmpty) {
       ""
     } else {
-      paths.split(",").map { p => Utils.resolveURI(p, testWindows) }.mkString(",")
+      paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",")
     }
   }
 
@@ -1847,8 +1829,7 @@ private[spark] object Utils extends Logging {
       Array.empty
     } else {
       paths.split(",").filter { p =>
-        val formattedPath = if (windows) formatWindowsPath(p) else p
-        val uri = new URI(formattedPath)
+        val uri = resolveURI(p)
         Option(uri.getScheme).getOrElse("file") match {
           case windowsDrive(d) if windows => false
           case "local" | "file" => false
diff --git a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
index bb6251fb4bfbe..80f2cc02516fe 100644
--- a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.deploy
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.util.Utils
+
 class PythonRunnerSuite extends FunSuite {
 
   // Test formatting a single path to be added to the PYTHONPATH
@@ -28,10 +30,14 @@ class PythonRunnerSuite extends FunSuite {
     assert(PythonRunner.formatPath("file:///spark.py") === "/spark.py")
     assert(PythonRunner.formatPath("local:/spark.py") === "/spark.py")
     assert(PythonRunner.formatPath("local:///spark.py") === "/spark.py")
-    assert(PythonRunner.formatPath("C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py")
-    assert(PythonRunner.formatPath("/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py")
-    assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) ===
-      "C:/a/b/spark.py")
+    if (Utils.isWindows) {
+      assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) ===
+        "C:/a/b/spark.py")
+      assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) ===
+        "C:/a/b/spark.py")
+      assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) ===
+        "C:/a b/spark.py")
+    }
     intercept[IllegalArgumentException] { PythonRunner.formatPath("one:two") }
     intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:s3:xtremeFS") }
     intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:/path/to/some.py") }
@@ -45,14 +51,15 @@ class PythonRunnerSuite extends FunSuite {
       Array("/app.py", "/spark.py"))
     assert(PythonRunner.formatPaths("me.py,file:/you.py,local:/we.py") ===
       Array("me.py", "/you.py", "/we.py"))
-    assert(PythonRunner.formatPaths("C:/a/b/spark.py", testWindows = true) ===
-      Array("C:/a/b/spark.py"))
-    assert(PythonRunner.formatPaths("/C:/a/b/spark.py", testWindows = true) ===
-      Array("C:/a/b/spark.py"))
-    assert(PythonRunner.formatPaths("C:/free.py,pie.py", testWindows = true) ===
-      Array("C:/free.py", "pie.py"))
-    assert(PythonRunner.formatPaths("lovely.py,C:/free.py,file:/d:/fry.py", testWindows = true) ===
-      Array("lovely.py", "C:/free.py", "d:/fry.py"))
+    if (Utils.isWindows) {
+      assert(PythonRunner.formatPaths("C:\\a\\b\\spark.py", testWindows = true) ===
+        Array("C:/a/b/spark.py"))
+      assert(PythonRunner.formatPaths("C:\\free.py,pie.py", testWindows = true) ===
+        Array("C:/free.py", "pie.py"))
+      assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py",
+        testWindows = true) ===
+        Array("lovely.py", "C:/free.py", "d:/fry.py"))
+    }
     intercept[IllegalArgumentException] { PythonRunner.formatPaths("one:two,three") }
     intercept[IllegalArgumentException] { PythonRunner.formatPaths("two,three,four:five:six") }
     intercept[IllegalArgumentException] { PythonRunner.formatPaths("hdfs:/some.py,foo.py") }
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 651ead6ff1de2..61152c29a681f 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -367,51 +367,58 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
   }
 
   test("resolveURI") {
-    def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
+    def assertResolves(before: String, after: String): Unit = {
       // This should test only single paths
       assume(before.split(",").length === 1)
       // Repeated invocations of resolveURI should yield the same result
-      def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString
+      def resolve(uri: String): String = Utils.resolveURI(uri).toString
       assert(resolve(after) === after)
       assert(resolve(resolve(after)) === after)
       assert(resolve(resolve(resolve(after))) === after)
       // Also test resolveURIs with single paths
-      assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after))
-      assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after))
+      assert(new URI(Utils.resolveURIs(before)) === new URI(after))
+      assert(new URI(Utils.resolveURIs(after)) === new URI(after))
     }
-    val cwd = System.getProperty("user.dir")
+    val rawCwd = System.getProperty("user.dir")
+    val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd
     assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar")
     assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar")
     assertResolves("spark.jar", s"file:$cwd/spark.jar")
-    assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar")
-    assertResolves("C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true)
-    intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") }
-    intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") }
+    assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar")
+    assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt")
+    if (Utils.isWindows) {
+      assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt")
+      assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt")
+    }
+    assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt")
+    assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt")
+    assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt")
+    assertResolves("file:foo", s"file:foo")
+    assertResolves("file:foo:baby", s"file:foo:baby")
   }
 
   test("resolveURIs with multiple paths") {
-    def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
+    def assertResolves(before: String, after: String): Unit = {
       assume(before.split(",").length > 1)
-      assert(Utils.resolveURIs(before, testWindows) === after)
-      assert(Utils.resolveURIs(after, testWindows) === after)
+      assert(Utils.resolveURIs(before) === after)
+      assert(Utils.resolveURIs(after) === after)
       // Repeated invocations of resolveURIs should yield the same result
-      def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows)
+      def resolve(uri: String): String = Utils.resolveURIs(uri)
       assert(resolve(after) === after)
       assert(resolve(resolve(after)) === after)
       assert(resolve(resolve(resolve(after))) === after)
     }
-    val cwd = System.getProperty("user.dir")
+    val rawCwd = System.getProperty("user.dir")
+    val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd
     assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
     assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2")
     assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
-    assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5",
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5")
-    assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi",
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true)
+    assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6",
+      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6")
+    if (Utils.isWindows) {
+      assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""",
+        s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4")
+    }
   }
 
   test("nonLocalPaths") {
@@ -426,6 +433,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
     assert(Utils.nonLocalPaths("local:/spark.jar,file:/smart.jar,family.py") === Array.empty)
     assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar") ===
       Array("hdfs:/spark.jar", "s3:/smart.jar"))
+    assert(Utils.nonLocalPaths("hdfs:/spark.jar,path to/a.jar,s3:/smart.jar") ===
+      Array("hdfs:/spark.jar", "s3:/smart.jar"))
     assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar,local.py,file:/hello/pi.py") ===
       Array("hdfs:/spark.jar", "s3:/smart.jar"))
     assert(Utils.nonLocalPaths("local.py,hdfs:/spark.jar,file:/hello/pi.py,s3:/smart.jar") ===
@@ -547,7 +556,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
     val targetDir = new File(tempDir, "target-dir")
     Files.write("some text", sourceFile, UTF_8)
 
-    val path = new Path("file://" + sourceDir.getAbsolutePath)
+    val path =
+      if (Utils.isWindows) {
+        new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/"))
+      } else {
+        new Path("file://" + sourceDir.getAbsolutePath)
+      }
     val conf = new Configuration()
     val fs = Utils.getHadoopFileSystem(path.toString, conf)
 
@@ -567,7 +581,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
     val destInnerFile = new File(destInnerDir, sourceFile.getName)
     assert(destInnerFile.isFile())
 
-    val filePath = new Path("file://" + sourceFile.getAbsolutePath)
+    val filePath =
+      if (Utils.isWindows) {
+        new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/"))
+      } else {
+        new Path("file://" + sourceFile.getAbsolutePath)
+      }
     val testFileDir = new File(tempDir, "test-filename")
     val testFileName = "testFName"
     val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf)
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 488f3a9f33256..2b235525250c2 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -206,7 +206,8 @@ class SparkILoop(
         // e.g. file:/C:/my/path.jar -> C:/my/path.jar
         SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") }
       } else {
-        SparkILoop.getAddedJars
+        // We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20).
+        SparkILoop.getAddedJars.map { jar => new URI(jar).getPath }
       }
     // work around for Scala bug
     val totalClassPath = addedJars.foldLeft(
@@ -1109,7 +1110,7 @@ object SparkILoop extends Logging {
         if (settings.classpath.isDefault)
           settings.classpath.value = sys.props("java.class.path")
 
-        getAddedJars.foreach(settings.classpath.append(_))
+        getAddedJars.map(jar => new URI(jar).getPath).foreach(settings.classpath.append(_))
 
         repl process settings
       }

From 10c546e9d42a0f3fbf45c919e74f62c548ca8347 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 13 May 2015 07:35:55 -0700
Subject: [PATCH 137/320] [SPARK-7599] [SQL] Don't restrict customized output
 committers to be subclasses of FileOutputCommitter

Author: Cheng Lian <lian@databricks.com>

Closes #6118 from liancheng/spark-7599 and squashes the following commits:

31e1bd6 [Cheng Lian] Don't restrict customized output committers to be subclasses of FileOutputCommitter
---
 .../apache/spark/sql/sources/commands.scala   | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index 8372d2c34acc7..fe8be5b7feeb9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -244,7 +244,7 @@ private[sql] abstract class BaseWriterContainer(
   @transient private val jobContext: JobContext = job
 
   // The following fields are initialized and used on both driver and executor side.
-  @transient protected var outputCommitter: FileOutputCommitter = _
+  @transient protected var outputCommitter: OutputCommitter = _
   @transient private var jobId: JobID = _
   @transient private var taskId: TaskID = _
   @transient private var taskAttemptId: TaskAttemptID = _
@@ -282,14 +282,18 @@ private[sql] abstract class BaseWriterContainer(
     initWriters()
   }
 
-  private def newOutputCommitter(context: TaskAttemptContext): FileOutputCommitter = {
-    outputFormatClass.newInstance().getOutputCommitter(context) match {
-      case f: FileOutputCommitter => f
-      case f => sys.error(
-        s"FileOutputCommitter or its subclass is expected, but got a ${f.getClass.getName}.")
+  protected def getWorkPath: String = {
+    outputCommitter match {
+      // FileOutputCommitter writes to a temporary location returned by `getWorkPath`.
+      case f: FileOutputCommitter => f.getWorkPath.toString
+      case _ => outputPath
     }
   }
 
+  private def newOutputCommitter(context: TaskAttemptContext): OutputCommitter = {
+    outputFormatClass.newInstance().getOutputCommitter(context)
+  }
+
   private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = {
     this.jobId = SparkHadoopWriter.createJobID(new Date, jobId)
     this.taskId = new TaskID(this.jobId, true, splitId)
@@ -339,7 +343,7 @@ private[sql] class DefaultWriterContainer(
 
   override protected def initWriters(): Unit = {
     writer = outputWriterClass.newInstance()
-    writer.init(outputCommitter.getWorkPath.toString, dataSchema, taskAttemptContext)
+    writer.init(getWorkPath, dataSchema, taskAttemptContext)
   }
 
   override def outputWriterForRow(row: Row): OutputWriter = writer
@@ -381,7 +385,7 @@ private[sql] class DynamicPartitionWriterContainer(
     }.mkString
 
     outputWriters.getOrElseUpdate(partitionPath, {
-      val path = new Path(outputCommitter.getWorkPath, partitionPath.stripPrefix(Path.SEPARATOR))
+      val path = new Path(getWorkPath, partitionPath.stripPrefix(Path.SEPARATOR))
       val writer = outputWriterClass.newInstance()
       writer.init(path.toString, dataSchema, taskAttemptContext)
       writer

From b061bd517a3dc26e7f37a334f49c3465d98334c6 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Wed, 13 May 2015 23:36:19 +0800
Subject: [PATCH 138/320] [SQL] In InsertIntoFSBasedRelation.insert, log cause
 before abort job/task.

We need to add a log entry before calling `abortTask`/`abortJob`. Otherwise, an exception from `abortTask`/`abortJob` will shadow the real cause.

cc liancheng

Author: Yin Huai <yhuai@databricks.com>

Closes #6105 from yhuai/logCause and squashes the following commits:

8dfe0d8 [Yin Huai] Log cause.
---
 .../src/main/scala/org/apache/spark/sql/sources/commands.scala  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index fe8be5b7feeb9..a294297677d1a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -121,6 +121,7 @@ private[sql] case class InsertIntoFSBasedRelation(
       writerContainer.commitJob()
       relation.refresh()
     } catch { case cause: Throwable =>
+      logError("Aborting job.", cause)
       writerContainer.abortJob()
       throw new SparkException("Job aborted.", cause)
     }
@@ -143,6 +144,7 @@ private[sql] case class InsertIntoFSBasedRelation(
         }
         writerContainer.commitTask()
       } catch { case cause: Throwable =>
+        logError("Aborting task.", cause)
         writerContainer.abortTask()
         throw new SparkException("Task failed while writing rows.", cause)
       }

From aa6ba3f2166edcc8bcda3abc70482fa8605e83b7 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 13 May 2015 23:40:13 +0800
Subject: [PATCH 139/320] [MINOR] [SQL] Removes debugging println

Author: Cheng Lian <lian@databricks.com>

Closes #6123 from liancheng/remove-println and squashes the following commits:

03356b6 [Cheng Lian] Removes debugging println
---
 .../org/apache/spark/sql/sources/FSBasedRelationSuite.scala     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
index 415b1cd168848..e8b48a0db1c79 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
@@ -509,8 +509,6 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
         path.makeQualified(fs.getUri, fs.getWorkingDirectory).toString
       }
 
-      println(df.queryExecution)
-
       val actualPaths = df.queryExecution.analyzed.collectFirst {
         case LogicalRelation(relation: FSBasedRelation) =>
           relation.paths.toSet

From 0da254fb2903c01e059fa7d0dc81df5740312b35 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Thu, 14 May 2015 00:14:59 +0800
Subject: [PATCH 140/320] [SPARK-6734] [SQL] Add UDTF.close support in Generate

Some third-party UDTF extensions generate additional rows in the "GenericUDTF.close()" method, which is supported / documented by Hive.
https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
However, Spark SQL ignores the "GenericUDTF.close()", and it causes bug while porting job from Hive to Spark SQL.

Author: Cheng Hao <hao.cheng@intel.com>

Closes #5383 from chenghao-intel/udtf_close and squashes the following commits:

98b4e4b [Cheng Hao] Support UDTF.close
---
 .../sql/catalyst/expressions/generators.scala |   6 +++
 .../apache/spark/sql/execution/Generate.scala |  38 +++++++++++++-----
 .../org/apache/spark/sql/hive/hiveUdfs.scala  |  18 +++++++--
 sql/hive/src/test/resources/TestUDTF.jar      | Bin 0 -> 1328 bytes
 ...l Views-0-ac5c96224a534f07b49462ad76620678 |   2 +
 ... SELECT-0-517f834fef35b896ec64399f42b2a151 |   2 +
 .../sql/hive/execution/HiveQuerySuite.scala   |  21 ++++++++++
 7 files changed, 74 insertions(+), 13 deletions(-)
 create mode 100644 sql/hive/src/test/resources/TestUDTF.jar
 create mode 100644 sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678
 create mode 100644 sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 9a6cb048af5ad..747a47bdde953 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -56,6 +56,12 @@ abstract class Generator extends Expression {
 
   /** Should be implemented by child classes to perform specific Generators. */
   override def eval(input: Row): TraversableOnce[Row]
+
+  /**
+   * Notifies that there are no more rows to process, clean up code, and additional
+   * rows can be made here.
+   */
+  def terminate(): TraversableOnce[Row] = Nil
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index 08d9079335132..dd02c1f4573bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -21,6 +21,18 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 
+/**
+ * For lazy computing, be sure the generator.terminate() called in the very last
+ * TODO reusing the CompletionIterator?
+ */
+private[execution] sealed case class LazyIterator(func: () => TraversableOnce[Row])
+  extends Iterator[Row] {
+
+  lazy val results = func().toIterator
+  override def hasNext: Boolean = results.hasNext
+  override def next(): Row = results.next()
+}
+
 /**
  * :: DeveloperApi ::
  * Applies a [[catalyst.expressions.Generator Generator]] to a stream of input rows, combining the
@@ -47,27 +59,33 @@ case class Generate(
   val boundGenerator = BindReferences.bindReference(generator, child.output)
 
   protected override def doExecute(): RDD[Row] = {
+    // boundGenerator.terminate() should be triggered after all of the rows in the partition
     if (join) {
       child.execute().mapPartitions { iter =>
-        val nullValues = Seq.fill(generator.elementTypes.size)(Literal(null))
-        // Used to produce rows with no matches when outer = true.
-        val outerProjection =
-          newProjection(child.output ++ nullValues, child.output)
-
-        val joinProjection = newProjection(output, output)
+        val generatorNullRow = Row.fromSeq(Seq.fill[Any](generator.elementTypes.size)(null))
         val joinedRow = new JoinedRow
 
-        iter.flatMap {row =>
+        iter.flatMap { row =>
+          // we should always set the left (child output)
+          joinedRow.withLeft(row)
           val outputRows = boundGenerator.eval(row)
           if (outer && outputRows.isEmpty) {
-            outerProjection(row) :: Nil
+            joinedRow.withRight(generatorNullRow) :: Nil
           } else {
-            outputRows.map(or => joinProjection(joinedRow(row, or)))
+            outputRows.map(or => joinedRow.withRight(or))
           }
+        } ++ LazyIterator(() => boundGenerator.terminate()).map { row =>
+          // we leave the left side as the last element of its child output
+          // keep it the same as Hive does
+          joinedRow.withRight(row)
         }
       }
     } else {
-      child.execute().mapPartitions(iter => iter.flatMap(row => boundGenerator.eval(row)))
+      child.execute().mapPartitions { iter =>
+        iter.flatMap(row => boundGenerator.eval(row)) ++
+        LazyIterator(() => boundGenerator.terminate())
+      }
     }
   }
 }
+
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index fd0b6f058595d..bc6b3a2d58c38 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -483,7 +483,11 @@ private[hive] case class HiveGenericUdtf(
   extends Generator with HiveInspectors {
 
   @transient
-  protected lazy val function: GenericUDTF = funcWrapper.createFunction()
+  protected lazy val function: GenericUDTF = {
+    val fun: GenericUDTF = funcWrapper.createFunction()
+    fun.setCollector(collector)
+    fun
+  }
 
   @transient
   protected lazy val inputInspectors = children.map(toInspector)
@@ -494,6 +498,9 @@ private[hive] case class HiveGenericUdtf(
   @transient
   protected lazy val udtInput = new Array[AnyRef](children.length)
 
+  @transient
+  protected lazy val collector = new UDTFCollector
+
   lazy val elementTypes = outputInspector.getAllStructFieldRefs.map {
     field => (inspectorToDataType(field.getFieldObjectInspector), true)
   }
@@ -502,8 +509,7 @@ private[hive] case class HiveGenericUdtf(
     outputInspector // Make sure initialized.
 
     val inputProjection = new InterpretedProjection(children)
-    val collector = new UDTFCollector
-    function.setCollector(collector)
+
     function.process(wrap(inputProjection(input), inputInspectors, udtInput))
     collector.collectRows()
   }
@@ -525,6 +531,12 @@ private[hive] case class HiveGenericUdtf(
     }
   }
 
+  override def terminate(): TraversableOnce[Row] = {
+    outputInspector // Make sure initialized.
+    function.close()
+    collector.collectRows()
+  }
+
   override def toString: String = {
     s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
   }
diff --git a/sql/hive/src/test/resources/TestUDTF.jar b/sql/hive/src/test/resources/TestUDTF.jar
new file mode 100644
index 0000000000000000000000000000000000000000..514f2d5d26fd358ad5647e0e75edb8ce77b69e30
GIT binary patch
literal 1328
zcmWIWW@Zs#;Nak3xSr7K#()Gk8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1g<Oz^PdT
zr~<Cp*U`_@%{4eg&)4m<@0rs+-nx1hdA)VD&Yd~GImqCO@q?#DdS1Rdp1v1LSFv!;
zcrAHkwxmYJ4{=qs=S#FKo{Nh<5m%kYFg5#A(Z?c2up{ouoD6jaTHyl32uGZcMD>9+
zP&U6PT|coPF*zeuzqlZ=C|kd{Fh@Tlvn*9VwIVgSv?Mb>Pv1Q?FSRH$In*V@%{jj`
zuf#|%IVZ8WcxlK>KjA=;e|ow+J<2CuHJwts5Oyj+z|m2lF^e}kXqra-9v}VG4QH-I
z3e+&hyZ7o`e8K!9;8^kPO%YMTk5r1!#h&{#_wB!5f8VM%C>{yDve5geZOQcoVzyjW
zs{AWm4sj`IiIjvs-uI-|v|~|xYaioEfnb9b2_6TT+q@2c)9qham^b@<k?rBNTjp)B
znJ9KnqIp-Q_4}QZqwd^LoA|ZZs6>gU-(}S?-_u{zB$yU{&~h%g<;U5!%kr`i_rG&(
z1=D`Ixp}xt-d0_A?wzF4>zUi0YDp~H8)9C0?2p5AdFR)~+a_PDoF*OZbpMg&?Q^f9
zpRKzP`*6vT1$oc^%$hb^HO-n;rgQS&h#!-Fd7l5syZ%O3Q#+5>wT-+Fck$>h6k2#X
z<LZ*P)@>cLjSfwFen_Y<bIlo@<I7XS*ncmb)RwkIL*k-Am%G%fiw6qU-k8<qW|CRK
zb1EbG-KFnmdhDKB)YP9VS?S<-_}mebp2wX{oV=4ORwZ~HHulM$aZu;*hn)xR>XhGm
z^=`M1UD-x;p7>=k`xz!GhN&ekd9&Q?Yf0SWJ!;Oue2X$)ds<E2dir|Bf;}r`AAS`1
zzT;mLpI>xC;)*?8E^F7HIkJ4tmi@{)@8^X5ZB1KR^!8JazL<IHPN9dB-mVe3HFKe5
z$gTy9^R&u;&q{n-w0COh8QE94LJ?MxqNY)Eym$FeSZdD{RdRkq$;mpq&p$UUaXoeD
zOug?7%_rZse3&G+J!r~>Nx?@F#6Qh=oqF?F^3mU?`(NA31m#>^_H^wjyAyleIZG~N
zN3cs-gj`tdxtaav#WjMwmn?hLF23F(t;XrL$Fhh;j8kUuBZfwg5A03sEB^S1^O$#B
zQ*zkSJH>jz!pVO5lRCTRbb9WZtQMslA1J9H<l*+@lR&8YpECi>Ga0HDESi2z@s87s
ze>SVae&s**JNU+?Q1kX**;A{PL(Yr;aEf<6t9WJW3-OqVd-SV#-d<kWEEXcvyGZJ<
z$UeViY#ilpuAGehb^AhGfspE(og$t~8l7W!_nln+fM??@VF|_L#r2AB7nHJE{Rihb
zq1!z{oXiXiv)Dj6j*&@(0kzbC6$zkH0~NrF0Z<u&t`#{WgUSp9umv*VT9HZ+WD`Ic
h9@$}_437YZfJ_Rqe}FeD(669kkD1{akTwB(3;-%Z1494+

literal 0
HcmV?d00001

diff --git a/sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678 b/sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678
new file mode 100644
index 0000000000000..946e72fc87c2e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678	
@@ -0,0 +1,2 @@
+97	500
+97	500
diff --git a/sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151 b/sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151
new file mode 100644
index 0000000000000..a5c8806279fa7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151	
@@ -0,0 +1,2 @@
+3
+3
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 2c9c08a9f3898..089a57e25c08d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -20,6 +20,9 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 import java.util.{Locale, TimeZone}
 
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorFactory, StructObjectInspector, ObjectInspector}
 import org.scalatest.BeforeAndAfter
 
 import scala.util.Try
@@ -51,14 +54,32 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
     // Add Locale setting
     Locale.setDefault(Locale.US)
+    sql(s"ADD JAR ${TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath()}")
+    // The function source code can be found at:
+    // https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
+    sql(
+      """
+        |CREATE TEMPORARY FUNCTION udtf_count2 
+        |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+      """.stripMargin)
   }
 
   override def afterAll() {
     TestHive.cacheTables = false
     TimeZone.setDefault(originalTimeZone)
     Locale.setDefault(originalLocale)
+    sql("DROP TEMPORARY FUNCTION udtf_count2")
   }
 
+  createQueryTest("Test UDTF.close in Lateral Views",
+     """
+       |SELECT key, cc
+       |FROM src LATERAL VIEW udtf_count2(value) dd AS cc
+     """.stripMargin, false) // false mean we have to keep the temp function in registry
+
+  createQueryTest("Test UDTF.close in SELECT",
+     "SELECT udtf_count2(a) FROM (SELECT 1 AS a FROM src LIMIT 3) table", false)
+
   test("SPARK-4908: concurrent hive native commands") {
     (1 to 100).par.map { _ =>
       sql("USE default")

From bec938f777a2e18757c7d04504d86a5342e2b49e Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Wed, 13 May 2015 10:01:26 -0700
Subject: [PATCH 141/320] [SPARK-7589] [STREAMING] [WEBUI] Make "Input Rate" in
 the Streaming page consistent with other pages

This PR makes "Input Rate" in the Streaming page consistent with Job and Stage pages.

![screen shot 2015-05-12 at 5 03 35 pm](https://cloud.githubusercontent.com/assets/1000778/7601444/f943f8ac-f8ca-11e4-8280-a715d814f434.png)
![screen shot 2015-05-12 at 5 07 25 pm](https://cloud.githubusercontent.com/assets/1000778/7601445/f9571c0c-f8ca-11e4-9b12-9317cb55c002.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #6102 from zsxwing/SPARK-7589 and squashes the following commits:

2745225 [zsxwing] Make "Input Rate" in the Streaming page consistent with other pages
---
 .../apache/spark/ui/static/streaming-page.css |  4 +++
 .../apache/spark/ui/static/streaming-page.js  | 18 ++++++++++---
 .../spark/streaming/ui/StreamingPage.scala    | 27 +++++++++----------
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.css b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.css
index 5da9d631ad124..19abe889ad3c1 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.css
@@ -56,3 +56,7 @@
 .histogram {
   width: auto;
 }
+
+span.expand-input-rate {
+  cursor: pointer;
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
index a4e03b156f13e..22b186873e990 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
@@ -266,9 +266,19 @@ $(function() {
         }
     }
 
-    if (getParameterFromURL("show-streams-detail") == "true") {
-        // Show the details for all InputDStream
-        $('#inputs-table').toggle('collapsed');
-        $('#triangle').html('&#9660;');
+    var status = getParameterFromURL("show-streams-detail") == "true";
+
+    $("span.expand-input-rate").click(function() {
+        status = !status;
+        $("#inputs-table").toggle('collapsed');
+        // Toggle the class of the arrow between open and closed
+        $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
+        window.history.pushState('', document.title, window.location.pathname + '?show-streams-detail=' + status);
+    });
+
+    if (status) {
+        $("#inputs-table").toggle('collapsed');
+        // Toggle the class of the arrow between open and closed
+        $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
     }
 });
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index ff0f2b18dc321..efce8c58fb962 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -244,17 +244,6 @@ private[ui] class StreamingPage(parent: StreamingTab)
     val maxEventRate = eventRateForAllStreams.max.map(_.ceil.toLong).getOrElse(0L)
     val minEventRate = 0L
 
-    // JavaScript to show/hide the InputDStreams sub table.
-    val triangleJs =
-      s"""$$('#inputs-table').toggle('collapsed');
-         |var status = false;
-         |if ($$(this).html() == '$BLACK_RIGHT_TRIANGLE_HTML') {
-         |$$(this).html('$BLACK_DOWN_TRIANGLE_HTML');status = true;}
-         |else {$$(this).html('$BLACK_RIGHT_TRIANGLE_HTML');status  = false;}
-         |window.history.pushState('',
-         |    document.title, window.location.pathname + '?show-streams-detail=' + status);"""
-        .stripMargin.replaceAll("\\n", "") // it must be only one single line
-
     val batchInterval = UIUtils.convertToTimeUnit(listener.batchDuration, normalizedUnit)
 
     val jsCollector = new JsCollector
@@ -326,10 +315,18 @@ private[ui] class StreamingPage(parent: StreamingTab)
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
               <div>
-              {if (hasStream) {
-                <span id="triangle" onclick={Unparsed(triangleJs)}>{Unparsed(BLACK_RIGHT_TRIANGLE_HTML)}</span>
-              }}
-                <strong>Input Rate</strong>
+              {
+                if (hasStream) {
+                  <span class="expand-input-rate">
+                    <span class="expand-input-rate-arrow arrow-closed"></span>
+                    <a data-toggle="tooltip" title="Show/hide details of each receiver" data-placement="right">
+                      <strong>Input Rate</strong>
+                    </a>
+                  </span>
+                } else {
+                  <strong>Input Rate</strong>
+                }
+              }
               </div>
               <div>Avg: {eventRateForAllStreams.formattedAvg} events/sec</div>
             </div>

From 7ff16e8abef9fbf4a4855e23c256b22e62e560a6 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 13 May 2015 11:04:10 -0700
Subject: [PATCH 142/320] [SPARK-7567] [SQL] Migrating Parquet data source to
 FSBasedRelation

This PR migrates Parquet data source to the newly introduced `FSBasedRelation`. `FSBasedParquetRelation` is created to replace `ParquetRelation2`. Major differences are:

1. Partition discovery code has been factored out to `FSBasedRelation`
1. `AppendingParquetOutputFormat` is not used now. Instead, an anonymous subclass of `ParquetOutputFormat` is used to handle appending and writing dynamic partitions
1. When scanning partitioned tables, `FSBasedParquetRelation.buildScan` only builds an `RDD[Row]` for a single selected partition
1. `FSBasedParquetRelation` doesn't rely on Catalyst expressions for filter push down, thus it doesn't extend `CatalystScan` anymore

   After migrating `JSONRelation` (which extends `CatalystScan`), we can remove `CatalystScan`.

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/6090)
<!-- Reviewable:end -->

Author: Cheng Lian <lian@databricks.com>

Closes #6090 from liancheng/parquet-migration and squashes the following commits:

6063f87 [Cheng Lian] Casts to OutputCommitter rather than FileOutputCommtter
bfd1cf0 [Cheng Lian] Fixes compilation error introduced while rebasing
f9ea56e [Cheng Lian] Adds ParquetRelation2 related classes to MiMa check whitelist
261d8c1 [Cheng Lian] Minor bug fix and more tests
db65660 [Cheng Lian] Migrates Parquet data source to FSBasedRelation
---
 project/MimaExcludes.scala                    |   6 +
 .../org/apache/spark/sql/SQLContext.scala     |   8 +-
 .../spark/sql/parquet/ParquetFilters.scala    | 278 +++---
 .../sql/parquet/ParquetTableOperations.scala  |   2 +-
 .../spark/sql/parquet/fsBasedParquet.scala    | 565 ++++++++++++
 .../apache/spark/sql/parquet/newParquet.scala | 840 ------------------
 .../apache/spark/sql/sources/commands.scala   |  19 +-
 .../sql/parquet/ParquetFilterSuite.scala      |   6 +-
 .../spark/sql/parquet/ParquetIOSuite.scala    |   6 +-
 .../ParquetPartitionDiscoverySuite.scala      |  10 +-
 .../sql/parquet/ParquetSchemaSuite.scala      |  12 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  25 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  15 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |  16 +-
 .../apache/spark/sql/hive/parquetSuites.scala |  35 +-
 ...uite.scala => fsBasedRelationSuites.scala} | 173 ++--
 16 files changed, 926 insertions(+), 1090 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
 rename sql/hive/src/test/scala/org/apache/spark/sql/sources/{FSBasedRelationSuite.scala => fsBasedRelationSuites.scala} (83%)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index a47e29e2ef365..f31f0e554eee9 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -111,6 +111,12 @@ object MimaExcludes {
               "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues"),
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2$"),
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache"),
             // These test support classes were moved out of src/main and into src/test:
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.ParquetTestData"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 975498c11fa23..0a148c7cd2d3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -27,9 +27,11 @@ import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
 import com.google.common.reflect.TypeToken
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
@@ -42,6 +44,7 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, e
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
 import org.apache.spark.sql.json._
+import org.apache.spark.sql.parquet.FSBasedParquetRelation
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -641,7 +644,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
     if (paths.isEmpty) {
       emptyDataFrame
     } else if (conf.parquetUseDataSourceApi) {
-      baseRelationToDataFrame(parquet.ParquetRelation2(paths, Map.empty)(this))
+      val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
+      baseRelationToDataFrame(
+        new FSBasedParquetRelation(
+          globbedPaths.map(_.toString), None, None, Map.empty[String, String])(this))
     } else {
       DataFrame(this, parquet.ParquetRelation(
         paths.mkString(","), Some(sparkContext.hadoopConfiguration), this))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
index 5eb1c6abc2432..f0f4e7d147e75 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
@@ -29,128 +29,184 @@ import parquet.io.api.Binary
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.sources
 import org.apache.spark.sql.types._
 
 private[sql] object ParquetFilters {
   val PARQUET_FILTER_DATA = "org.apache.spark.sql.parquet.row.filter"
 
   def createRecordFilter(filterExpressions: Seq[Expression]): Option[Filter] = {
-    filterExpressions.flatMap(createFilter).reduceOption(FilterApi.and).map(FilterCompat.get)
+    filterExpressions.flatMap { filter =>
+      createFilter(filter)
+    }.reduceOption(FilterApi.and).map(FilterCompat.get)
   }
 
-  def createFilter(predicate: Expression): Option[FilterPredicate] = {
-    val makeEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
-      case BooleanType =>
-        (n: String, v: Any) => FilterApi.eq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean])
-      case IntegerType =>
-        (n: String, v: Any) => FilterApi.eq(intColumn(n), v.asInstanceOf[Integer])
-      case LongType =>
-        (n: String, v: Any) => FilterApi.eq(longColumn(n), v.asInstanceOf[java.lang.Long])
-      case FloatType =>
-        (n: String, v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[java.lang.Float])
-      case DoubleType =>
-        (n: String, v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
-
-      // Binary.fromString and Binary.fromByteArray don't accept null values
-      case StringType =>
-        (n: String, v: Any) => FilterApi.eq(
-          binaryColumn(n),
-          Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull)
-      case BinaryType =>
-        (n: String, v: Any) => FilterApi.eq(
-          binaryColumn(n),
-          Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull)
-    }
+  private val makeEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+    case BooleanType =>
+      (n: String, v: Any) => FilterApi.eq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean])
+    case IntegerType =>
+      (n: String, v: Any) => FilterApi.eq(intColumn(n), v.asInstanceOf[Integer])
+    case LongType =>
+      (n: String, v: Any) => FilterApi.eq(longColumn(n), v.asInstanceOf[java.lang.Long])
+    case FloatType =>
+      (n: String, v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+    case DoubleType =>
+      (n: String, v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
 
-    val makeNotEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
-      case BooleanType =>
-        (n: String, v: Any) => FilterApi.notEq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean])
-      case IntegerType =>
-        (n: String, v: Any) => FilterApi.notEq(intColumn(n), v.asInstanceOf[Integer])
-      case LongType =>
-        (n: String, v: Any) => FilterApi.notEq(longColumn(n), v.asInstanceOf[java.lang.Long])
-      case FloatType =>
-        (n: String, v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
-      case DoubleType =>
-        (n: String, v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
-      case StringType =>
-        (n: String, v: Any) => FilterApi.notEq(
-          binaryColumn(n),
-          Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull)
-      case BinaryType =>
-        (n: String, v: Any) => FilterApi.notEq(
-          binaryColumn(n),
-          Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull)
-    }
+    // Binary.fromString and Binary.fromByteArray don't accept null values
+    case StringType =>
+      (n: String, v: Any) => FilterApi.eq(
+        binaryColumn(n),
+        Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull)
+    case BinaryType =>
+      (n: String, v: Any) => FilterApi.eq(
+        binaryColumn(n),
+        Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull)
+  }
 
-    val makeLt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
-      case IntegerType =>
-        (n: String, v: Any) => FilterApi.lt(intColumn(n), v.asInstanceOf[Integer])
-      case LongType =>
-        (n: String, v: Any) => FilterApi.lt(longColumn(n), v.asInstanceOf[java.lang.Long])
-      case FloatType =>
-        (n: String, v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[java.lang.Float])
-      case DoubleType =>
-        (n: String, v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
-      case StringType =>
-        (n: String, v: Any) =>
-          FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
-      case BinaryType =>
-        (n: String, v: Any) =>
-          FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
-    }
+  private val makeNotEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+    case BooleanType =>
+      (n: String, v: Any) => FilterApi.notEq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean])
+    case IntegerType =>
+      (n: String, v: Any) => FilterApi.notEq(intColumn(n), v.asInstanceOf[Integer])
+    case LongType =>
+      (n: String, v: Any) => FilterApi.notEq(longColumn(n), v.asInstanceOf[java.lang.Long])
+    case FloatType =>
+      (n: String, v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+    case DoubleType =>
+      (n: String, v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+    case StringType =>
+      (n: String, v: Any) => FilterApi.notEq(
+        binaryColumn(n),
+        Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull)
+    case BinaryType =>
+      (n: String, v: Any) => FilterApi.notEq(
+        binaryColumn(n),
+        Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull)
+  }
 
-    val makeLtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
-      case IntegerType =>
-        (n: String, v: Any) => FilterApi.ltEq(intColumn(n), v.asInstanceOf[java.lang.Integer])
-      case LongType =>
-        (n: String, v: Any) => FilterApi.ltEq(longColumn(n), v.asInstanceOf[java.lang.Long])
-      case FloatType =>
-        (n: String, v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
-      case DoubleType =>
-        (n: String, v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
-      case StringType =>
-        (n: String, v: Any) =>
-          FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
-      case BinaryType =>
-        (n: String, v: Any) =>
-          FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
-    }
+  private val makeLt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+    case IntegerType =>
+      (n: String, v: Any) => FilterApi.lt(intColumn(n), v.asInstanceOf[Integer])
+    case LongType =>
+      (n: String, v: Any) => FilterApi.lt(longColumn(n), v.asInstanceOf[java.lang.Long])
+    case FloatType =>
+      (n: String, v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[java.lang.Float])
+    case DoubleType =>
+      (n: String, v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+    case StringType =>
+      (n: String, v: Any) =>
+        FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
+    case BinaryType =>
+      (n: String, v: Any) =>
+        FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
+  }
 
-    val makeGt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
-      case IntegerType =>
-        (n: String, v: Any) => FilterApi.gt(intColumn(n), v.asInstanceOf[java.lang.Integer])
-      case LongType =>
-        (n: String, v: Any) => FilterApi.gt(longColumn(n), v.asInstanceOf[java.lang.Long])
-      case FloatType =>
-        (n: String, v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[java.lang.Float])
-      case DoubleType =>
-        (n: String, v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
-      case StringType =>
-        (n: String, v: Any) =>
-          FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
-      case BinaryType =>
-        (n: String, v: Any) =>
-          FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
-    }
+  private val makeLtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+    case IntegerType =>
+      (n: String, v: Any) => FilterApi.ltEq(intColumn(n), v.asInstanceOf[java.lang.Integer])
+    case LongType =>
+      (n: String, v: Any) => FilterApi.ltEq(longColumn(n), v.asInstanceOf[java.lang.Long])
+    case FloatType =>
+      (n: String, v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+    case DoubleType =>
+      (n: String, v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+    case StringType =>
+      (n: String, v: Any) =>
+        FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
+    case BinaryType =>
+      (n: String, v: Any) =>
+        FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
+  }
 
-    val makeGtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
-      case IntegerType =>
-        (n: String, v: Any) => FilterApi.gtEq(intColumn(n), v.asInstanceOf[java.lang.Integer])
-      case LongType =>
-        (n: String, v: Any) => FilterApi.gtEq(longColumn(n), v.asInstanceOf[java.lang.Long])
-      case FloatType =>
-        (n: String, v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
-      case DoubleType =>
-        (n: String, v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
-      case StringType =>
-        (n: String, v: Any) =>
-          FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
-      case BinaryType =>
-        (n: String, v: Any) =>
-          FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
+  private val makeGt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+    case IntegerType =>
+      (n: String, v: Any) => FilterApi.gt(intColumn(n), v.asInstanceOf[java.lang.Integer])
+    case LongType =>
+      (n: String, v: Any) => FilterApi.gt(longColumn(n), v.asInstanceOf[java.lang.Long])
+    case FloatType =>
+      (n: String, v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[java.lang.Float])
+    case DoubleType =>
+      (n: String, v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+    case StringType =>
+      (n: String, v: Any) =>
+        FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
+    case BinaryType =>
+      (n: String, v: Any) =>
+        FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
+  }
+
+  private val makeGtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
+    case IntegerType =>
+      (n: String, v: Any) => FilterApi.gtEq(intColumn(n), v.asInstanceOf[java.lang.Integer])
+    case LongType =>
+      (n: String, v: Any) => FilterApi.gtEq(longColumn(n), v.asInstanceOf[java.lang.Long])
+    case FloatType =>
+      (n: String, v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
+    case DoubleType =>
+      (n: String, v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+    case StringType =>
+      (n: String, v: Any) =>
+        FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes))
+    case BinaryType =>
+      (n: String, v: Any) =>
+        FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]]))
+  }
+
+  /**
+   * Converts data sources filters to Parquet filter predicates.
+   */
+  def createFilter(schema: StructType, predicate: sources.Filter): Option[FilterPredicate] = {
+    val dataTypeOf = schema.map(f => f.name -> f.dataType).toMap
+
+    // NOTE:
+    //
+    // For any comparison operator `cmp`, both `a cmp NULL` and `NULL cmp a` evaluate to `NULL`,
+    // which can be casted to `false` implicitly. Please refer to the `eval` method of these
+    // operators and the `SimplifyFilters` rule for details.
+    predicate match {
+      case sources.IsNull(name) =>
+        makeEq.lift(dataTypeOf(name)).map(_(name, null))
+      case sources.IsNotNull(name) =>
+        makeNotEq.lift(dataTypeOf(name)).map(_(name, null))
+
+      case sources.EqualTo(name, value) =>
+        makeEq.lift(dataTypeOf(name)).map(_(name, value))
+      case sources.Not(sources.EqualTo(name, value)) =>
+        makeNotEq.lift(dataTypeOf(name)).map(_(name, value))
+
+      case sources.LessThan(name, value) =>
+        makeLt.lift(dataTypeOf(name)).map(_(name, value))
+      case sources.LessThanOrEqual(name, value) =>
+        makeLtEq.lift(dataTypeOf(name)).map(_(name, value))
+
+      case sources.GreaterThan(name, value) =>
+        makeGt.lift(dataTypeOf(name)).map(_(name, value))
+      case sources.GreaterThanOrEqual(name, value) =>
+        makeGtEq.lift(dataTypeOf(name)).map(_(name, value))
+
+      case sources.And(lhs, rhs) =>
+        (createFilter(schema, lhs) ++ createFilter(schema, rhs)).reduceOption(FilterApi.and)
+
+      case sources.Or(lhs, rhs) =>
+        for {
+          lhsFilter <- createFilter(schema, lhs)
+          rhsFilter <- createFilter(schema, rhs)
+        } yield FilterApi.or(lhsFilter, rhsFilter)
+
+      case sources.Not(pred) =>
+        createFilter(schema, pred).map(FilterApi.not)
+
+      case _ => None
     }
+  }
 
+  /**
+   * Converts Catalyst predicate expressions to Parquet filter predicates.
+   *
+   * @todo This can be removed once we get rid of the old Parquet support.
+   */
+  def createFilter(predicate: Expression): Option[FilterPredicate] = {
     // NOTE:
     //
     // For any comparison operator `cmp`, both `a cmp NULL` and `NULL cmp a` evaluate to `NULL`,
@@ -170,7 +226,7 @@ private[sql] object ParquetFilters {
         makeEq.lift(dataType).map(_(name, value))
       case EqualTo(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
         makeEq.lift(dataType).map(_(name, value))
-      
+
       case Not(EqualTo(NamedExpression(name, _), NonNullLiteral(value, dataType))) =>
         makeNotEq.lift(dataType).map(_(name, value))
       case Not(EqualTo(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _))) =>
@@ -192,7 +248,7 @@ private[sql] object ParquetFilters {
       case LessThanOrEqual(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
         makeLtEq.lift(dataType).map(_(name, value))
       case LessThanOrEqual(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
-        makeLtEq.lift(dataType).map(_(name, value))      
+        makeLtEq.lift(dataType).map(_(name, value))
       case LessThanOrEqual(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
         makeGtEq.lift(dataType).map(_(name, value))
       case LessThanOrEqual(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
@@ -201,7 +257,7 @@ private[sql] object ParquetFilters {
       case GreaterThan(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
         makeGt.lift(dataType).map(_(name, value))
       case GreaterThan(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
-        makeGt.lift(dataType).map(_(name, value)) 
+        makeGt.lift(dataType).map(_(name, value))
       case GreaterThan(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
         makeLt.lift(dataType).map(_(name, value))
       case GreaterThan(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
@@ -210,7 +266,7 @@ private[sql] object ParquetFilters {
       case GreaterThanOrEqual(NamedExpression(name, _), NonNullLiteral(value, dataType)) =>
         makeGtEq.lift(dataType).map(_(name, value))
       case GreaterThanOrEqual(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) =>
-        makeGtEq.lift(dataType).map(_(name, value)) 
+        makeGtEq.lift(dataType).map(_(name, value))
       case GreaterThanOrEqual(NonNullLiteral(value, dataType), NamedExpression(name, _)) =>
         makeLtEq.lift(dataType).map(_(name, value))
       case GreaterThanOrEqual(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index 75ac52d4a98ff..90950f924a054 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -674,7 +674,7 @@ private[parquet] object FileSystemHelper {
   def findMaxTaskId(pathStr: String, conf: Configuration): Int = {
     val files = FileSystemHelper.listFiles(pathStr, conf)
     // filename pattern is part-r-<int>.parquet
-    val nameP = new scala.util.matching.Regex("""part-r-(\d{1,}).parquet""", "taskid")
+    val nameP = new scala.util.matching.Regex("""part-.-(\d{1,}).*""", "taskid")
     val hiddenFileP = new scala.util.matching.Regex("_.*")
     files.map(_.getName).map {
       case nameP(taskid) => taskid.toInt
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
new file mode 100644
index 0000000000000..d810d6a028c58
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
@@ -0,0 +1,565 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import java.util.{List => JList}
+
+import scala.collection.JavaConversions._
+import scala.util.Try
+
+import com.google.common.base.Objects
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import parquet.filter2.predicate.FilterApi
+import parquet.format.converter.ParquetMetadataConverter
+import parquet.hadoop._
+import parquet.hadoop.metadata.CompressionCodecName
+import parquet.hadoop.util.ContextUtil
+
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.rdd.RDD._
+import org.apache.spark.rdd.{NewHadoopPartition, NewHadoopRDD, RDD}
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.{Row, SQLConf, SQLContext}
+import org.apache.spark.{Logging, Partition => SparkPartition, SparkException}
+
+private[sql] class DefaultSource extends FSBasedRelationProvider {
+  override def createRelation(
+      sqlContext: SQLContext,
+      paths: Array[String],
+      schema: Option[StructType],
+      partitionColumns: Option[StructType],
+      parameters: Map[String, String]): FSBasedRelation = {
+    val partitionSpec = partitionColumns.map(PartitionSpec(_, Seq.empty))
+    new FSBasedParquetRelation(paths, schema, partitionSpec, parameters)(sqlContext)
+  }
+}
+
+// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
+private[sql] class ParquetOutputWriter extends OutputWriter {
+  private var recordWriter: RecordWriter[Void, Row] = _
+  private var taskAttemptContext: TaskAttemptContext = _
+
+  override def init(
+      path: String,
+      dataSchema: StructType,
+      context: TaskAttemptContext): Unit = {
+    val conf = context.getConfiguration
+    val outputFormat = {
+      // When appending new Parquet files to an existing Parquet file directory, to avoid
+      // overwriting existing data files, we need to find out the max task ID encoded in these data
+      // file names.
+      // TODO Make this snippet a utility function for other data source developers
+      val maxExistingTaskId = {
+        // Note that `path` may point to a temporary location.  Here we retrieve the real
+        // destination path from the configuration
+        val outputPath = new Path(conf.get("spark.sql.sources.output.path"))
+        val fs = outputPath.getFileSystem(conf)
+
+        if (fs.exists(outputPath)) {
+          // Pattern used to match task ID in part file names, e.g.:
+          //
+          //   part-r-00001.gz.part
+          //          ^~~~~
+          val partFilePattern = """part-.-(\d{1,}).*""".r
+
+          fs.listStatus(outputPath).map(_.getPath.getName).map {
+            case partFilePattern(id) => id.toInt
+            case name if name.startsWith("_") => 0
+            case name if name.startsWith(".") => 0
+            case name => sys.error(
+              s"""Trying to write Parquet files to directory $outputPath,
+                 |but found items with illegal name "$name"
+               """.stripMargin.replace('\n', ' ').trim)
+          }.reduceOption(_ max _).getOrElse(0)
+        } else {
+          0
+        }
+      }
+
+      new ParquetOutputFormat[Row]() {
+        // Here we override `getDefaultWorkFile` for two reasons:
+        //
+        //  1. To allow appending.  We need to generate output file name based on the max available
+        //     task ID computed above.
+        //
+        //  2. To allow dynamic partitioning.  Default `getDefaultWorkFile` uses
+        //     `FileOutputCommitter.getWorkPath()`, which points to the base directory of all
+        //     partitions in the case of dynamic partitioning.
+        override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
+          val split = context.getTaskAttemptID.getTaskID.getId + maxExistingTaskId + 1
+          new Path(path, f"part-r-$split%05d$extension")
+        }
+      }
+    }
+
+    recordWriter = outputFormat.getRecordWriter(context)
+    taskAttemptContext = context
+  }
+
+  override def write(row: Row): Unit = recordWriter.write(null, row)
+
+  override def close(): Unit = recordWriter.close(taskAttemptContext)
+}
+
+private[sql] class FSBasedParquetRelation(
+    paths: Array[String],
+    private val maybeDataSchema: Option[StructType],
+    private val maybePartitionSpec: Option[PartitionSpec],
+    parameters: Map[String, String])(
+    val sqlContext: SQLContext)
+  extends FSBasedRelation(paths, maybePartitionSpec)
+  with Logging {
+
+  // Should we merge schemas from all Parquet part-files?
+  private val shouldMergeSchemas =
+    parameters.getOrElse(FSBasedParquetRelation.MERGE_SCHEMA, "true").toBoolean
+
+  private val maybeMetastoreSchema = parameters
+    .get(FSBasedParquetRelation.METASTORE_SCHEMA)
+    .map(DataType.fromJson(_).asInstanceOf[StructType])
+
+  private val metadataCache = new MetadataCache
+  metadataCache.refresh()
+
+  override def equals(other: scala.Any): Boolean = other match {
+    case that: FSBasedParquetRelation =>
+      val schemaEquality = if (shouldMergeSchemas) {
+        this.shouldMergeSchemas == that.shouldMergeSchemas
+      } else {
+        this.dataSchema == that.dataSchema &&
+          this.schema == that.schema
+      }
+
+      this.paths.toSet == that.paths.toSet &&
+        schemaEquality &&
+        this.maybeDataSchema == that.maybeDataSchema &&
+        this.partitionColumns == that.partitionColumns
+
+    case _ => false
+  }
+
+  override def hashCode(): Int = {
+    if (shouldMergeSchemas) {
+      Objects.hashCode(
+        Boolean.box(shouldMergeSchemas),
+        paths.toSet,
+        maybeDataSchema,
+        maybePartitionSpec)
+    } else {
+      Objects.hashCode(
+        Boolean.box(shouldMergeSchemas),
+        paths.toSet,
+        dataSchema,
+        schema,
+        maybeDataSchema,
+        maybePartitionSpec)
+    }
+  }
+
+  override def outputWriterClass: Class[_ <: OutputWriter] = classOf[ParquetOutputWriter]
+
+  override def dataSchema: StructType = metadataCache.dataSchema
+
+  override private[sql] def refresh(): Unit = {
+    metadataCache.refresh()
+    super.refresh()
+  }
+
+  // Parquet data source always uses Catalyst internal representations.
+  override val needConversion: Boolean = false
+
+  override val sizeInBytes = metadataCache.dataStatuses.map(_.getLen).sum
+
+  override def prepareForWrite(job: Job): Unit = {
+    val conf = ContextUtil.getConfiguration(job)
+
+    val committerClass =
+      conf.getClass(
+        "spark.sql.parquet.output.committer.class",
+        classOf[ParquetOutputCommitter],
+        classOf[ParquetOutputCommitter])
+
+    conf.setClass(
+      "mapred.output.committer.class",
+      committerClass,
+      classOf[ParquetOutputCommitter])
+
+    // TODO There's no need to use two kinds of WriteSupport
+    // We should unify them. `SpecificMutableRow` can process both atomic (primitive) types and
+    // complex types.
+    val writeSupportClass =
+      if (dataSchema.map(_.dataType).forall(ParquetTypesConverter.isPrimitiveType)) {
+        classOf[MutableRowWriteSupport]
+      } else {
+        classOf[RowWriteSupport]
+      }
+
+    ParquetOutputFormat.setWriteSupportClass(job, writeSupportClass)
+    RowWriteSupport.setSchema(dataSchema.toAttributes, conf)
+
+    // Sets compression scheme
+    conf.set(
+      ParquetOutputFormat.COMPRESSION,
+      ParquetRelation
+        .shortParquetCompressionCodecNames
+        .getOrElse(
+          sqlContext.conf.parquetCompressionCodec.toUpperCase,
+          CompressionCodecName.UNCOMPRESSED).name())
+  }
+
+  override def buildScan(
+      requiredColumns: Array[String],
+      filters: Array[Filter],
+      inputPaths: Array[String]): RDD[Row] = {
+
+    val job = Job.getInstance(SparkHadoopUtil.get.conf)
+    val conf = ContextUtil.getConfiguration(job)
+
+    ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
+
+    if (inputPaths.nonEmpty) {
+      FileInputFormat.setInputPaths(job, inputPaths.map(new Path(_)): _*)
+    }
+
+    // Try to push down filters when filter push-down is enabled.
+    if (sqlContext.conf.parquetFilterPushDown) {
+      filters
+        // Collects all converted Parquet filter predicates. Notice that not all predicates can be
+        // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
+        // is used here.
+        .flatMap(ParquetFilters.createFilter(dataSchema, _))
+        .reduceOption(FilterApi.and)
+        .foreach(ParquetInputFormat.setFilterPredicate(conf, _))
+    }
+
+    conf.set(RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA, {
+      val requestedSchema = StructType(requiredColumns.map(dataSchema(_)))
+      ParquetTypesConverter.convertToString(requestedSchema.toAttributes)
+    })
+
+    conf.set(
+      RowWriteSupport.SPARK_ROW_SCHEMA,
+      ParquetTypesConverter.convertToString(dataSchema.toAttributes))
+
+    // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
+    val useMetadataCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean
+    conf.set(SQLConf.PARQUET_CACHE_METADATA, useMetadataCache.toString)
+
+    val inputFileStatuses =
+      metadataCache.dataStatuses.filter(f => inputPaths.contains(f.getPath.toString))
+
+    val footers = inputFileStatuses.map(metadataCache.footers)
+
+    // TODO Stop using `FilteringParquetRowInputFormat` and overriding `getPartition`.
+    // After upgrading to Parquet 1.6.0, we should be able to stop caching `FileStatus` objects and
+    // footers.  Especially when a global arbitrative schema (either from metastore or data source
+    // DDL) is available.
+    new NewHadoopRDD(
+      sqlContext.sparkContext,
+      classOf[FilteringParquetRowInputFormat],
+      classOf[Void],
+      classOf[Row],
+      conf) {
+
+      val cacheMetadata = useMetadataCache
+
+      @transient val cachedStatuses = inputFileStatuses.map { f =>
+        // In order to encode the authority of a Path containing special characters such as /,
+        // we need to use the string returned by the URI of the path to create a new Path.
+        val pathWithAuthority = new Path(f.getPath.toUri.toString)
+
+        new FileStatus(
+          f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime,
+          f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithAuthority)
+      }.toSeq
+
+      @transient val cachedFooters = footers.map { f =>
+        // In order to encode the authority of a Path containing special characters such as /,
+        // we need to use the string returned by the URI of the path to create a new Path.
+        new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata)
+      }.toSeq
+
+      // Overridden so we can inject our own cached files statuses.
+      override def getPartitions: Array[SparkPartition] = {
+        val inputFormat = if (cacheMetadata) {
+          new FilteringParquetRowInputFormat {
+            override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatuses
+
+            override def getFooters(jobContext: JobContext): JList[Footer] = cachedFooters
+          }
+        } else {
+          new FilteringParquetRowInputFormat
+        }
+
+        val jobContext = newJobContext(getConf, jobId)
+        val rawSplits = inputFormat.getSplits(jobContext)
+
+        Array.tabulate[SparkPartition](rawSplits.size) { i =>
+          new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+        }
+      }
+    }.values
+  }
+
+  private class MetadataCache {
+    // `FileStatus` objects of all "_metadata" files.
+    private var metadataStatuses: Array[FileStatus] = _
+
+    // `FileStatus` objects of all "_common_metadata" files.
+    private var commonMetadataStatuses: Array[FileStatus] = _
+
+    // Parquet footer cache.
+    var footers: Map[FileStatus, Footer] = _
+
+    // `FileStatus` objects of all data files (Parquet part-files).
+    var dataStatuses: Array[FileStatus] = _
+
+    // Schema of the actual Parquet files, without partition columns discovered from partition
+    // directory paths.
+    var dataSchema: StructType = _
+
+    // Schema of the whole table, including partition columns.
+    var schema: StructType = _
+
+    /**
+     * Refreshes `FileStatus`es, footers, partition spec, and table schema.
+     */
+    def refresh(): Unit = {
+      // Support either reading a collection of raw Parquet part-files, or a collection of folders
+      // containing Parquet files (e.g. partitioned Parquet table).
+      val baseStatuses = paths.distinct.flatMap { p =>
+        val path = new Path(p)
+        val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
+        val qualified = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        Try(fs.getFileStatus(qualified)).toOption
+      }
+      assert(baseStatuses.forall(!_.isDir) || baseStatuses.forall(_.isDir))
+
+      // Lists `FileStatus`es of all leaf nodes (files) under all base directories.
+      val leaves = baseStatuses.flatMap { f =>
+        val fs = FileSystem.get(f.getPath.toUri, SparkHadoopUtil.get.conf)
+        SparkHadoopUtil.get.listLeafStatuses(fs, f.getPath).filter { f =>
+          isSummaryFile(f.getPath) ||
+            !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith("."))
+        }
+      }
+
+      dataStatuses = leaves.filterNot(f => isSummaryFile(f.getPath))
+      metadataStatuses = leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_METADATA_FILE)
+      commonMetadataStatuses =
+        leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE)
+
+      footers = (dataStatuses ++ metadataStatuses ++ commonMetadataStatuses).par.map { f =>
+        val parquetMetadata = ParquetFileReader.readFooter(
+          SparkHadoopUtil.get.conf, f, ParquetMetadataConverter.NO_FILTER)
+        f -> new Footer(f.getPath, parquetMetadata)
+      }.seq.toMap
+
+      dataSchema = {
+        val dataSchema0 =
+          maybeDataSchema
+            .orElse(readSchema())
+            .orElse(maybeMetastoreSchema)
+            .getOrElse(sys.error("Failed to get the schema."))
+
+        // If this Parquet relation is converted from a Hive Metastore table, must reconcile case
+        // case insensitivity issue and possible schema mismatch (probably caused by schema
+        // evolution).
+        maybeMetastoreSchema
+          .map(FSBasedParquetRelation.mergeMetastoreParquetSchema(_, dataSchema0))
+          .getOrElse(dataSchema0)
+      }
+    }
+
+    private def isSummaryFile(file: Path): Boolean = {
+      file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
+        file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
+    }
+
+    private def readSchema(): Option[StructType] = {
+      // Sees which file(s) we need to touch in order to figure out the schema.
+      //
+      // Always tries the summary files first if users don't require a merged schema.  In this case,
+      // "_common_metadata" is more preferable than "_metadata" because it doesn't contain row
+      // groups information, and could be much smaller for large Parquet files with lots of row
+      // groups.
+      //
+      // NOTE: Metadata stored in the summary files are merged from all part-files.  However, for
+      // user defined key-value metadata (in which we store Spark SQL schema), Parquet doesn't know
+      // how to merge them correctly if some key is associated with different values in different
+      // part-files.  When this happens, Parquet simply gives up generating the summary file.  This
+      // implies that if a summary file presents, then:
+      //
+      //   1. Either all part-files have exactly the same Spark SQL schema, or
+      //   2. Some part-files don't contain Spark SQL schema in the key-value metadata at all (thus
+      //      their schemas may differ from each other).
+      //
+      // Here we tend to be pessimistic and take the second case into account.  Basically this means
+      // we can't trust the summary files if users require a merged schema, and must touch all part-
+      // files to do the merge.
+      val filesToTouch =
+        if (shouldMergeSchemas) {
+          // Also includes summary files, 'cause there might be empty partition directories.
+          (metadataStatuses ++ commonMetadataStatuses ++ dataStatuses).toSeq
+        } else {
+          // Tries any "_common_metadata" first. Parquet files written by old versions or Parquet
+          // don't have this.
+          commonMetadataStatuses.headOption
+            // Falls back to "_metadata"
+            .orElse(metadataStatuses.headOption)
+            // Summary file(s) not found, the Parquet file is either corrupted, or different part-
+            // files contain conflicting user defined metadata (two or more values are associated
+            // with a same key in different files).  In either case, we fall back to any of the
+            // first part-file, and just assume all schemas are consistent.
+            .orElse(dataStatuses.headOption)
+            .toSeq
+        }
+
+      assert(
+        filesToTouch.nonEmpty || maybeDataSchema.isDefined || maybeMetastoreSchema.isDefined,
+        "No schema defined, " +
+          s"and no Parquet data file or summary file found under ${paths.mkString(", ")}.")
+
+      FSBasedParquetRelation.readSchema(filesToTouch.map(footers.apply), sqlContext)
+    }
+  }
+}
+
+private[sql] object FSBasedParquetRelation extends Logging {
+  // Whether we should merge schemas collected from all Parquet part-files.
+  private[sql] val MERGE_SCHEMA = "mergeSchema"
+
+  // Hive Metastore schema, used when converting Metastore Parquet tables.  This option is only used
+  // internally.
+  private[sql] val METASTORE_SCHEMA = "metastoreSchema"
+
+  private[parquet] def readSchema(
+      footers: Seq[Footer], sqlContext: SQLContext): Option[StructType] = {
+    footers.map { footer =>
+      val metadata = footer.getParquetMetadata.getFileMetaData
+      val parquetSchema = metadata.getSchema
+      val maybeSparkSchema = metadata
+        .getKeyValueMetaData
+        .toMap
+        .get(RowReadSupport.SPARK_METADATA_KEY)
+        .flatMap { serializedSchema =>
+          // Don't throw even if we failed to parse the serialized Spark schema. Just fallback to
+          // whatever is available.
+          Try(DataType.fromJson(serializedSchema))
+            .recover { case _: Throwable =>
+              logInfo(
+                s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
+                  "falling back to the deprecated DataType.fromCaseClassString parser.")
+              DataType.fromCaseClassString(serializedSchema)
+            }
+            .recover { case cause: Throwable =>
+              logWarning(
+                s"""Failed to parse serialized Spark schema in Parquet key-value metadata:
+                   |\t$serializedSchema
+                 """.stripMargin,
+                cause)
+            }
+            .map(_.asInstanceOf[StructType])
+            .toOption
+        }
+
+      maybeSparkSchema.getOrElse {
+        // Falls back to Parquet schema if Spark SQL schema is absent.
+        StructType.fromAttributes(
+          // TODO Really no need to use `Attribute` here, we only need to know the data type.
+          ParquetTypesConverter.convertToAttributes(
+            parquetSchema,
+            sqlContext.conf.isParquetBinaryAsString,
+            sqlContext.conf.isParquetINT96AsTimestamp))
+      }
+    }.reduceOption { (left, right) =>
+      try left.merge(right) catch { case e: Throwable =>
+        throw new SparkException(s"Failed to merge incompatible schemas $left and $right", e)
+      }
+    }
+  }
+
+  /**
+   * Reconciles Hive Metastore case insensitivity issue and data type conflicts between Metastore
+   * schema and Parquet schema.
+   *
+   * Hive doesn't retain case information, while Parquet is case sensitive. On the other hand, the
+   * schema read from Parquet files may be incomplete (e.g. older versions of Parquet doesn't
+   * distinguish binary and string).  This method generates a correct schema by merging Metastore
+   * schema data types and Parquet schema field names.
+   */
+  private[parquet] def mergeMetastoreParquetSchema(
+      metastoreSchema: StructType,
+      parquetSchema: StructType): StructType = {
+    def schemaConflictMessage: String =
+      s"""Converting Hive Metastore Parquet, but detected conflicting schemas. Metastore schema:
+         |${metastoreSchema.prettyJson}
+         |
+         |Parquet schema:
+         |${parquetSchema.prettyJson}
+       """.stripMargin
+
+    val mergedParquetSchema = mergeMissingNullableFields(metastoreSchema, parquetSchema)
+
+    assert(metastoreSchema.size <= mergedParquetSchema.size, schemaConflictMessage)
+
+    val ordinalMap = metastoreSchema.zipWithIndex.map {
+      case (field, index) => field.name.toLowerCase -> index
+    }.toMap
+
+    val reorderedParquetSchema = mergedParquetSchema.sortBy(f =>
+      ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1))
+
+    StructType(metastoreSchema.zip(reorderedParquetSchema).map {
+      // Uses Parquet field names but retains Metastore data types.
+      case (mSchema, pSchema) if mSchema.name.toLowerCase == pSchema.name.toLowerCase =>
+        mSchema.copy(name = pSchema.name)
+      case _ =>
+        throw new SparkException(schemaConflictMessage)
+    })
+  }
+
+  /**
+   * Returns the original schema from the Parquet file with any missing nullable fields from the
+   * Hive Metastore schema merged in.
+   *
+   * When constructing a DataFrame from a collection of structured data, the resulting object has
+   * a schema corresponding to the union of the fields present in each element of the collection.
+   * Spark SQL simply assigns a null value to any field that isn't present for a particular row.
+   * In some cases, it is possible that a given table partition stored as a Parquet file doesn't
+   * contain a particular nullable field in its schema despite that field being present in the
+   * table schema obtained from the Hive Metastore. This method returns a schema representing the
+   * Parquet file schema along with any additional nullable fields from the Metastore schema
+   * merged in.
+   */
+  private[parquet] def mergeMissingNullableFields(
+      metastoreSchema: StructType,
+      parquetSchema: StructType): StructType = {
+    val fieldMap = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap
+    val missingFields = metastoreSchema
+      .map(_.name.toLowerCase)
+      .diff(parquetSchema.map(_.name.toLowerCase))
+      .map(fieldMap(_))
+      .filter(_.nullable)
+    StructType(parquetSchema ++ missingFields)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
deleted file mode 100644
index ee4b1c72a2148..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ /dev/null
@@ -1,840 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.parquet
-
-import java.io.IOException
-import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
-import java.math.{BigDecimal => JBigDecimal}
-import java.net.URI
-import java.text.SimpleDateFormat
-import java.util.{Date, List => JList}
-
-import scala.collection.JavaConversions._
-import scala.collection.mutable.ArrayBuffer
-import scala.util.Try
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
-import org.apache.hadoop.mapreduce.{InputSplit, Job, JobContext}
-import parquet.filter2.predicate.FilterApi
-import parquet.format.converter.ParquetMetadataConverter
-import parquet.hadoop.metadata.CompressionCodecName
-import parquet.hadoop.util.ContextUtil
-import parquet.hadoop.{ParquetInputFormat, _}
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.mapred.SparkHadoopMapRedUtil
-import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
-import org.apache.spark.rdd.{NewHadoopPartition, NewHadoopRDD, RDD}
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, expressions}
-import org.apache.spark.sql.parquet.ParquetTypesConverter._
-import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types.{IntegerType, StructField, StructType, _}
-import org.apache.spark.sql.{DataFrame, Row, SQLConf, SQLContext, SaveMode}
-import org.apache.spark.{Logging, SerializableWritable, SparkException, TaskContext, Partition => SparkPartition}
-
-/**
- * Allows creation of Parquet based tables using the syntax:
- * {{{
- *   CREATE TEMPORARY TABLE ... USING org.apache.spark.sql.parquet OPTIONS (...)
- * }}}
- *
- * Supported options include:
- *
- *  - `path`: Required. When reading Parquet files, `path` should point to the location of the
- *    Parquet file(s). It can be either a single raw Parquet file, or a directory of Parquet files.
- *    In the latter case, this data source tries to discover partitioning information if the the
- *    directory is structured in the same style of Hive partitioned tables. When writing Parquet
- *    file, `path` should point to the destination folder.
- *
- *  - `mergeSchema`: Optional. Indicates whether we should merge potentially different (but
- *    compatible) schemas stored in all Parquet part-files.
- *
- *  - `partition.defaultName`: Optional. Partition name used when a value of a partition column is
- *    null or empty string. This is similar to the `hive.exec.default.partition.name` configuration
- *    in Hive.
- */
-private[sql] class DefaultSource
-    extends RelationProvider
-    with SchemaRelationProvider
-    with CreatableRelationProvider {
-
-  private def checkPath(parameters: Map[String, String]): String = {
-    parameters.getOrElse("path", sys.error("'path' must be specified for parquet tables."))
-  }
-
-  /** Returns a new base relation with the given parameters. */
-  override def createRelation(
-      sqlContext: SQLContext,
-      parameters: Map[String, String]): BaseRelation = {
-    ParquetRelation2(Seq(checkPath(parameters)), parameters, None)(sqlContext)
-  }
-
-  /** Returns a new base relation with the given parameters and schema. */
-  override def createRelation(
-      sqlContext: SQLContext,
-      parameters: Map[String, String],
-      schema: StructType): BaseRelation = {
-    ParquetRelation2(Seq(checkPath(parameters)), parameters, Some(schema))(sqlContext)
-  }
-
-  /** Returns a new base relation with the given parameters and save given data into it. */
-  override def createRelation(
-      sqlContext: SQLContext,
-      mode: SaveMode,
-      parameters: Map[String, String],
-      data: DataFrame): BaseRelation = {
-    val path = checkPath(parameters)
-    val filesystemPath = new Path(path)
-    val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
-    val doInsertion = (mode, fs.exists(filesystemPath)) match {
-      case (SaveMode.ErrorIfExists, true) =>
-        sys.error(s"path $path already exists.")
-      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
-        true
-      case (SaveMode.Ignore, exists) =>
-        !exists
-    }
-
-    val relation = if (doInsertion) {
-      // This is a hack. We always set nullable/containsNull/valueContainsNull to true
-      // for the schema of a parquet data.
-      val df =
-        sqlContext.createDataFrame(
-          data.queryExecution.toRdd,
-          data.schema.asNullable,
-          needsConversion = false)
-      val createdRelation =
-        createRelation(sqlContext, parameters, df.schema).asInstanceOf[ParquetRelation2]
-      createdRelation.insert(df, overwrite = mode == SaveMode.Overwrite)
-      createdRelation
-    } else {
-      // If the save mode is Ignore, we will just create the relation based on existing data.
-      createRelation(sqlContext, parameters)
-    }
-
-    relation
-  }
-}
-
-/**
- * An alternative to [[ParquetRelation]] that plugs in using the data sources API.  This class is
- * intended as a full replacement of the Parquet support in Spark SQL.  The old implementation will
- * be deprecated and eventually removed once this version is proved to be stable enough.
- *
- * Compared with the old implementation, this class has the following notable differences:
- *
- *  - Partitioning discovery: Hive style multi-level partitions are auto discovered.
- *  - Metadata discovery: Parquet is a format comes with schema evolving support.  This data source
- *    can detect and merge schemas from all Parquet part-files as long as they are compatible.
- *    Also, metadata and [[FileStatus]]es are cached for better performance.
- *  - Statistics: Statistics for the size of the table are automatically populated during schema
- *    discovery.
- */
-@DeveloperApi
-private[sql] case class ParquetRelation2(
-    paths: Seq[String],
-    parameters: Map[String, String],
-    maybeSchema: Option[StructType] = None,
-    maybePartitionSpec: Option[PartitionSpec] = None)(
-    @transient val sqlContext: SQLContext)
-  extends BaseRelation
-  with CatalystScan
-  with InsertableRelation
-  with SparkHadoopMapReduceUtil
-  with Logging {
-
-  // Should we merge schemas from all Parquet part-files?
-  private val shouldMergeSchemas =
-    parameters.getOrElse(ParquetRelation2.MERGE_SCHEMA, "true").toBoolean
-
-  // Optional Metastore schema, used when converting Hive Metastore Parquet table
-  private val maybeMetastoreSchema =
-    parameters
-      .get(ParquetRelation2.METASTORE_SCHEMA)
-      .map(s => DataType.fromJson(s).asInstanceOf[StructType])
-
-  // Hive uses this as part of the default partition name when the partition column value is null
-  // or empty string
-  private val defaultPartitionName = parameters.getOrElse(
-    ParquetRelation2.DEFAULT_PARTITION_NAME, "__HIVE_DEFAULT_PARTITION__")
-
-  override def equals(other: Any): Boolean = other match {
-    case relation: ParquetRelation2 =>
-      // If schema merging is required, we don't compare the actual schemas since they may evolve.
-      val schemaEquality = if (shouldMergeSchemas) {
-        shouldMergeSchemas == relation.shouldMergeSchemas
-      } else {
-        schema == relation.schema
-      }
-
-      paths.toSet == relation.paths.toSet &&
-        schemaEquality &&
-        maybeMetastoreSchema == relation.maybeMetastoreSchema &&
-        maybePartitionSpec == relation.maybePartitionSpec
-
-    case _ => false
-  }
-
-  override def hashCode(): Int = {
-    if (shouldMergeSchemas) {
-      com.google.common.base.Objects.hashCode(
-        shouldMergeSchemas: java.lang.Boolean,
-        paths.toSet,
-        maybeMetastoreSchema,
-        maybePartitionSpec)
-    } else {
-      com.google.common.base.Objects.hashCode(
-        shouldMergeSchemas: java.lang.Boolean,
-        schema,
-        paths.toSet,
-        maybeMetastoreSchema,
-        maybePartitionSpec)
-    }
-  }
-
-  private[sql] def sparkContext = sqlContext.sparkContext
-
-  private class MetadataCache {
-    // `FileStatus` objects of all "_metadata" files.
-    private var metadataStatuses: Array[FileStatus] = _
-
-    // `FileStatus` objects of all "_common_metadata" files.
-    private var commonMetadataStatuses: Array[FileStatus] = _
-
-    // Parquet footer cache.
-    var footers: Map[FileStatus, Footer] = _
-
-    // `FileStatus` objects of all data files (Parquet part-files).
-    var dataStatuses: Array[FileStatus] = _
-
-    // Partition spec of this table, including names, data types, and values of each partition
-    // column, and paths of each partition.
-    var partitionSpec: PartitionSpec = _
-
-    // Schema of the actual Parquet files, without partition columns discovered from partition
-    // directory paths.
-    var parquetSchema: StructType = _
-
-    // Schema of the whole table, including partition columns.
-    var schema: StructType = _
-
-    // Indicates whether partition columns are also included in Parquet data file schema.  If not,
-    // we need to fill in partition column values into read rows when scanning the table.
-    var partitionKeysIncludedInParquetSchema: Boolean = _
-
-    def prepareMetadata(path: Path, schema: StructType, conf: Configuration): Unit = {
-      conf.set(
-        ParquetOutputFormat.COMPRESSION,
-        ParquetRelation
-          .shortParquetCompressionCodecNames
-          .getOrElse(
-            sqlContext.conf.parquetCompressionCodec.toUpperCase,
-            CompressionCodecName.UNCOMPRESSED).name())
-
-      ParquetRelation.enableLogForwarding()
-      ParquetTypesConverter.writeMetaData(schema.toAttributes, path, conf)
-    }
-
-    /**
-     * Refreshes `FileStatus`es, footers, partition spec, and table schema.
-     */
-    def refresh(): Unit = {
-      // Support either reading a collection of raw Parquet part-files, or a collection of folders
-      // containing Parquet files (e.g. partitioned Parquet table).
-      val baseStatuses = paths.distinct.map { p =>
-        val fs = FileSystem.get(URI.create(p), sparkContext.hadoopConfiguration)
-        val path = new Path(p)
-        val qualified = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
-
-        if (!fs.exists(qualified) && maybeSchema.isDefined) {
-          fs.mkdirs(qualified)
-          prepareMetadata(qualified, maybeSchema.get, sparkContext.hadoopConfiguration)
-        }
-
-        fs.getFileStatus(qualified)
-      }.toArray
-      assert(baseStatuses.forall(!_.isDir) || baseStatuses.forall(_.isDir))
-
-      // Lists `FileStatus`es of all leaf nodes (files) under all base directories.
-      val leaves = baseStatuses.flatMap { f =>
-        val fs = FileSystem.get(f.getPath.toUri, sparkContext.hadoopConfiguration)
-        SparkHadoopUtil.get.listLeafStatuses(fs, f.getPath).filter { f =>
-          isSummaryFile(f.getPath) ||
-            !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith("."))
-        }
-      }
-
-      dataStatuses = leaves.filterNot(f => isSummaryFile(f.getPath))
-      metadataStatuses = leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_METADATA_FILE)
-      commonMetadataStatuses =
-        leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE)
-
-      footers = (dataStatuses ++ metadataStatuses ++ commonMetadataStatuses).par.map { f =>
-        val parquetMetadata = ParquetFileReader.readFooter(
-          sparkContext.hadoopConfiguration, f, ParquetMetadataConverter.NO_FILTER)
-        f -> new Footer(f.getPath, parquetMetadata)
-      }.seq.toMap
-
-      partitionSpec = maybePartitionSpec.getOrElse {
-        val partitionDirs = leaves
-          .filterNot(baseStatuses.contains)
-          .map(_.getPath.getParent)
-          .distinct
-
-        if (partitionDirs.nonEmpty) {
-          // Parses names and values of partition columns, and infer their data types.
-          PartitioningUtils.parsePartitions(partitionDirs, defaultPartitionName)
-        } else {
-          // No partition directories found, makes an empty specification
-          PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[Partition])
-        }
-      }
-
-      // To get the schema. We first try to get the schema defined in maybeSchema.
-      // If maybeSchema is not defined, we will try to get the schema from existing parquet data
-      // (through readSchema). If data does not exist, we will try to get the schema defined in
-      // maybeMetastoreSchema (defined in the options of the data source).
-      // Finally, if we still could not get the schema. We throw an error.
-      parquetSchema =
-        maybeSchema
-          .orElse(readSchema())
-          .orElse(maybeMetastoreSchema)
-          .getOrElse(sys.error("Failed to get the schema."))
-
-      partitionKeysIncludedInParquetSchema =
-        isPartitioned &&
-          partitionColumns.forall(f => parquetSchema.fieldNames.contains(f.name))
-
-      schema = {
-        val fullRelationSchema = if (partitionKeysIncludedInParquetSchema) {
-          parquetSchema
-        } else {
-          StructType(parquetSchema.fields ++ partitionColumns.fields)
-        }
-
-        // If this Parquet relation is converted from a Hive Metastore table, must reconcile case
-        // insensitivity issue and possible schema mismatch.
-        maybeMetastoreSchema
-          .map(ParquetRelation2.mergeMetastoreParquetSchema(_, fullRelationSchema))
-          .getOrElse(fullRelationSchema)
-      }
-    }
-
-    private def readSchema(): Option[StructType] = {
-      // Sees which file(s) we need to touch in order to figure out the schema.
-      val filesToTouch =
-      // Always tries the summary files first if users don't require a merged schema.  In this case,
-      // "_common_metadata" is more preferable than "_metadata" because it doesn't contain row
-      // groups information, and could be much smaller for large Parquet files with lots of row
-      // groups.
-      //
-      // NOTE: Metadata stored in the summary files are merged from all part-files.  However, for
-      // user defined key-value metadata (in which we store Spark SQL schema), Parquet doesn't know
-      // how to merge them correctly if some key is associated with different values in different
-      // part-files.  When this happens, Parquet simply gives up generating the summary file.  This
-      // implies that if a summary file presents, then:
-      //
-      //   1. Either all part-files have exactly the same Spark SQL schema, or
-      //   2. Some part-files don't contain Spark SQL schema in the key-value metadata at all (thus
-      //      their schemas may differ from each other).
-      //
-      // Here we tend to be pessimistic and take the second case into account.  Basically this means
-      // we can't trust the summary files if users require a merged schema, and must touch all part-
-      // files to do the merge.
-        if (shouldMergeSchemas) {
-          // Also includes summary files, 'cause there might be empty partition directories.
-          (metadataStatuses ++ commonMetadataStatuses ++ dataStatuses).toSeq
-        } else {
-          // Tries any "_common_metadata" first. Parquet files written by old versions or Parquet
-          // don't have this.
-          commonMetadataStatuses.headOption
-            // Falls back to "_metadata"
-            .orElse(metadataStatuses.headOption)
-            // Summary file(s) not found, the Parquet file is either corrupted, or different part-
-            // files contain conflicting user defined metadata (two or more values are associated
-            // with a same key in different files).  In either case, we fall back to any of the
-            // first part-file, and just assume all schemas are consistent.
-            .orElse(dataStatuses.headOption)
-            .toSeq
-        }
-
-      ParquetRelation2.readSchema(filesToTouch.map(footers.apply), sqlContext)
-    }
-  }
-
-  @transient private val metadataCache = new MetadataCache
-  metadataCache.refresh()
-
-  def partitionSpec: PartitionSpec = metadataCache.partitionSpec
-
-  def partitionColumns: StructType = metadataCache.partitionSpec.partitionColumns
-
-  def partitions: Seq[Partition] = metadataCache.partitionSpec.partitions
-
-  def isPartitioned: Boolean = partitionColumns.nonEmpty
-
-  private def partitionKeysIncludedInDataSchema = metadataCache.partitionKeysIncludedInParquetSchema
-
-  private def parquetSchema = metadataCache.parquetSchema
-
-  override def schema: StructType = metadataCache.schema
-
-  private def isSummaryFile(file: Path): Boolean = {
-    file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
-      file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
-  }
-
-  // Skip type conversion
-  override val needConversion: Boolean = false
-
-  // TODO Should calculate per scan size
-  // It's common that a query only scans a fraction of a large Parquet file.  Returning size of the
-  // whole Parquet file disables some optimizations in this case (e.g. broadcast join).
-  override val sizeInBytes = metadataCache.dataStatuses.map(_.getLen).sum
-
-  // This is mostly a hack so that we can use the existing parquet filter code.
-  override def buildScan(output: Seq[Attribute], predicates: Seq[Expression]): RDD[Row] = {
-    val job = new Job(sparkContext.hadoopConfiguration)
-    ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
-    val jobConf: Configuration = ContextUtil.getConfiguration(job)
-
-    val selectedPartitions = prunePartitions(predicates, partitions)
-    val selectedFiles = if (isPartitioned) {
-      selectedPartitions.flatMap { p =>
-        metadataCache.dataStatuses.filter(_.getPath.getParent.toString == p.path)
-      }
-    } else {
-      metadataCache.dataStatuses.toSeq
-    }
-    val selectedFooters = selectedFiles.map(metadataCache.footers)
-
-    // FileInputFormat cannot handle empty lists.
-    if (selectedFiles.nonEmpty) {
-      // In order to encode the authority of a Path containning special characters such as /,
-      // we need to use the string retruned by the URI of the path to create a new Path.
-      val selectedPaths = selectedFiles.map(status => new Path(status.getPath.toUri.toString))
-      FileInputFormat.setInputPaths(job, selectedPaths: _*)
-    }
-
-    // Try to push down filters when filter push-down is enabled.
-    if (sqlContext.conf.parquetFilterPushDown) {
-      val partitionColNames = partitionColumns.map(_.name).toSet
-      predicates
-        // Don't push down predicates which reference partition columns
-        .filter { pred =>
-          val referencedColNames = pred.references.map(_.name).toSet
-          referencedColNames.intersect(partitionColNames).isEmpty
-        }
-        // Collects all converted Parquet filter predicates. Notice that not all predicates can be
-        // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
-        // is used here.
-        .flatMap(ParquetFilters.createFilter)
-        .reduceOption(FilterApi.and)
-        .foreach(ParquetInputFormat.setFilterPredicate(jobConf, _))
-    }
-
-    if (isPartitioned) {
-      logInfo {
-        val percentRead = selectedPartitions.size.toDouble / partitions.size.toDouble * 100
-        s"Reading $percentRead% of partitions"
-      }
-    }
-
-    val requiredColumns = output.map(_.name)
-    val requestedSchema = StructType(requiredColumns.map(schema(_)))
-
-    // Store both requested and original schema in `Configuration`
-    jobConf.set(
-      RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
-      convertToString(requestedSchema.toAttributes))
-    jobConf.set(
-      RowWriteSupport.SPARK_ROW_SCHEMA,
-      convertToString(schema.toAttributes))
-
-    // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
-    val useCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean
-    jobConf.set(SQLConf.PARQUET_CACHE_METADATA, useCache.toString)
-
-    val baseRDD =
-      new NewHadoopRDD(
-          sparkContext,
-          classOf[FilteringParquetRowInputFormat],
-          classOf[Void],
-          classOf[Row],
-          jobConf) {
-        val cacheMetadata = useCache
-
-        @transient
-        val cachedStatus = selectedFiles.map { st =>
-          // In order to encode the authority of a Path containning special characters such as /,
-          // we need to use the string retruned by the URI of the path to create a new Path.
-          val newPath = new Path(st.getPath.toUri.toString)
-
-          new FileStatus(
-            st.getLen,
-            st.isDir,
-            st.getReplication,
-            st.getBlockSize,
-            st.getModificationTime,
-            st.getAccessTime,
-            st.getPermission,
-            st.getOwner,
-            st.getGroup,
-            newPath)
-        }
-
-        @transient
-        val cachedFooters = selectedFooters.map { f =>
-          // In order to encode the authority of a Path containning special characters such as /,
-          // we need to use the string retruned by the URI of the path to create a new Path.
-          new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata)
-        }
-
-
-        // Overridden so we can inject our own cached files statuses.
-        override def getPartitions: Array[SparkPartition] = {
-          val inputFormat = if (cacheMetadata) {
-            new FilteringParquetRowInputFormat {
-              override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatus
-
-              override def getFooters(jobContext: JobContext): JList[Footer] = cachedFooters
-            }
-          } else {
-            new FilteringParquetRowInputFormat
-          }
-
-          val jobContext = newJobContext(getConf, jobId)
-          val rawSplits = inputFormat.getSplits(jobContext)
-
-          Array.tabulate[SparkPartition](rawSplits.size) { i =>
-            new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
-          }
-        }
-      }
-
-    // The ordinals for partition keys in the result row, if requested.
-    val partitionKeyLocations = partitionColumns.fieldNames.zipWithIndex.map {
-      case (name, index) => index -> requiredColumns.indexOf(name)
-    }.toMap.filter {
-      case (_, index) => index >= 0
-    }
-
-    // When the data does not include the key and the key is requested then we must fill it in
-    // based on information from the input split.
-    if (!partitionKeysIncludedInDataSchema && partitionKeyLocations.nonEmpty) {
-      // This check is based on CatalystConverter.createRootConverter.
-      val primitiveRow =
-        requestedSchema.forall(a => ParquetTypesConverter.isPrimitiveType(a.dataType))
-
-      baseRDD.mapPartitionsWithInputSplit { case (split: ParquetInputSplit, iterator) =>
-        val partValues = selectedPartitions.collectFirst {
-          case p if split.getPath.getParent.toString == p.path =>
-            CatalystTypeConverters.convertToCatalyst(p.values).asInstanceOf[Row]
-        }.get
-
-        val requiredPartOrdinal = partitionKeyLocations.keys.toSeq
-
-        if (primitiveRow) {
-          iterator.map { pair =>
-            // We are using CatalystPrimitiveRowConverter and it returns a SpecificMutableRow.
-            val row = pair._2.asInstanceOf[SpecificMutableRow]
-            var i = 0
-            while (i < requiredPartOrdinal.size) {
-              // TODO Avoids boxing cost here!
-              val partOrdinal = requiredPartOrdinal(i)
-              row.update(partitionKeyLocations(partOrdinal), partValues(partOrdinal))
-              i += 1
-            }
-            row
-          }
-        } else {
-          // Create a mutable row since we need to fill in values from partition columns.
-          val mutableRow = new GenericMutableRow(requestedSchema.size)
-          iterator.map { pair =>
-            // We are using CatalystGroupConverter and it returns a GenericRow.
-            // Since GenericRow is not mutable, we just cast it to a Row.
-            val row = pair._2.asInstanceOf[Row]
-            var i = 0
-            while (i < row.size) {
-              // TODO Avoids boxing cost here!
-              mutableRow(i) = row(i)
-              i += 1
-            }
-
-            i = 0
-            while (i < requiredPartOrdinal.size) {
-              // TODO Avoids boxing cost here!
-              val partOrdinal = requiredPartOrdinal(i)
-              mutableRow.update(partitionKeyLocations(partOrdinal), partValues(partOrdinal))
-              i += 1
-            }
-            mutableRow
-          }
-        }
-      }
-    } else {
-      baseRDD.map(_._2)
-    }
-  }
-
-  private def prunePartitions(
-      predicates: Seq[Expression],
-      partitions: Seq[Partition]): Seq[Partition] = {
-    val partitionColumnNames = partitionColumns.map(_.name).toSet
-    val partitionPruningPredicates = predicates.filter {
-      _.references.map(_.name).toSet.subsetOf(partitionColumnNames)
-    }
-
-    val rawPredicate =
-      partitionPruningPredicates.reduceOption(expressions.And).getOrElse(Literal(true))
-    val boundPredicate = InterpretedPredicate.create(rawPredicate transform {
-      case a: AttributeReference =>
-        val index = partitionColumns.indexWhere(a.name == _.name)
-        BoundReference(index, partitionColumns(index).dataType, nullable = true)
-    })
-
-    if (isPartitioned && partitionPruningPredicates.nonEmpty) {
-      partitions.filter(p => boundPredicate(p.values))
-    } else {
-      partitions
-    }
-  }
-
-  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
-    assert(paths.size == 1, s"Can't write to multiple destinations: ${paths.mkString(",")}")
-
-    // TODO: currently we do not check whether the "schema"s are compatible
-    // That means if one first creates a table and then INSERTs data with
-    // and incompatible schema the execution will fail. It would be nice
-    // to catch this early one, maybe having the planner validate the schema
-    // before calling execute().
-
-    val job = new Job(sqlContext.sparkContext.hadoopConfiguration)
-    val writeSupport =
-      if (parquetSchema.map(_.dataType).forall(ParquetTypesConverter.isPrimitiveType)) {
-        log.debug("Initializing MutableRowWriteSupport")
-        classOf[MutableRowWriteSupport]
-      } else {
-        classOf[RowWriteSupport]
-      }
-
-    ParquetOutputFormat.setWriteSupportClass(job, writeSupport)
-
-    val conf = ContextUtil.getConfiguration(job)
-    RowWriteSupport.setSchema(data.schema.toAttributes, conf)
-
-    val destinationPath = new Path(paths.head)
-
-    if (overwrite) {
-      val fs = destinationPath.getFileSystem(conf)
-      if (fs.exists(destinationPath)) {
-        var success: Boolean = false
-        try {
-          success = fs.delete(destinationPath, true)
-        } catch {
-          case e: IOException =>
-            throw new IOException(
-              s"Unable to clear output directory ${destinationPath.toString} prior" +
-                s" to writing to Parquet table:\n${e.toString}")
-        }
-        if (!success) {
-          throw new IOException(
-            s"Unable to clear output directory ${destinationPath.toString} prior" +
-              s" to writing to Parquet table.")
-        }
-      }
-    }
-
-    job.setOutputKeyClass(classOf[Void])
-    job.setOutputValueClass(classOf[Row])
-    FileOutputFormat.setOutputPath(job, destinationPath)
-
-    val wrappedConf = new SerializableWritable(job.getConfiguration)
-    val jobTrackerId = new SimpleDateFormat("yyyyMMddHHmm").format(new Date())
-    val stageId = sqlContext.sparkContext.newRddId()
-
-    val taskIdOffset = if (overwrite) {
-      1
-    } else {
-      FileSystemHelper.findMaxTaskId(
-        FileOutputFormat.getOutputPath(job).toString, job.getConfiguration) + 1
-    }
-
-    def writeShard(context: TaskContext, iterator: Iterator[Row]): Unit = {
-      /* "reduce task" <split #> <attempt # = spark task #> */
-      val attemptId = newTaskAttemptID(
-        jobTrackerId, stageId, isMap = false, context.partitionId(), context.attemptNumber())
-      val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId)
-      val format = new AppendingParquetOutputFormat(taskIdOffset)
-      val committer = format.getOutputCommitter(hadoopContext)
-      committer.setupTask(hadoopContext)
-      val writer = format.getRecordWriter(hadoopContext)
-      try {
-        while (iterator.hasNext) {
-          val row = iterator.next()
-          writer.write(null, row)
-        }
-      } finally {
-        writer.close(hadoopContext)
-      }
-
-      SparkHadoopMapRedUtil.commitTask(committer, hadoopContext, context)
-    }
-    val jobFormat = new AppendingParquetOutputFormat(taskIdOffset)
-    /* apparently we need a TaskAttemptID to construct an OutputCommitter;
-     * however we're only going to use this local OutputCommitter for
-     * setupJob/commitJob, so we just use a dummy "map" task.
-     */
-    val jobAttemptId = newTaskAttemptID(jobTrackerId, stageId, isMap = true, 0, 0)
-    val jobTaskContext = newTaskAttemptContext(wrappedConf.value, jobAttemptId)
-    val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext)
-
-    jobCommitter.setupJob(jobTaskContext)
-    sqlContext.sparkContext.runJob(data.queryExecution.executedPlan.execute(), writeShard _)
-    jobCommitter.commitJob(jobTaskContext)
-
-    metadataCache.refresh()
-  }
-}
-
-private[sql] object ParquetRelation2 extends Logging {
-  // Whether we should merge schemas collected from all Parquet part-files.
-  val MERGE_SCHEMA = "mergeSchema"
-
-  // Default partition name to use when the partition column value is null or empty string.
-  val DEFAULT_PARTITION_NAME = "partition.defaultName"
-
-  // Hive Metastore schema, used when converting Metastore Parquet tables.  This option is only used
-  // internally.
-  private[sql] val METASTORE_SCHEMA = "metastoreSchema"
-
-  private[parquet] def readSchema(
-      footers: Seq[Footer], sqlContext: SQLContext): Option[StructType] = {
-    footers.map { footer =>
-      val metadata = footer.getParquetMetadata.getFileMetaData
-      val parquetSchema = metadata.getSchema
-      val maybeSparkSchema = metadata
-        .getKeyValueMetaData
-        .toMap
-        .get(RowReadSupport.SPARK_METADATA_KEY)
-        .flatMap { serializedSchema =>
-          // Don't throw even if we failed to parse the serialized Spark schema. Just fallback to
-          // whatever is available.
-          Try(DataType.fromJson(serializedSchema))
-            .recover { case _: Throwable =>
-              logInfo(
-                s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
-                  "falling back to the deprecated DataType.fromCaseClassString parser.")
-              DataType.fromCaseClassString(serializedSchema)
-            }
-            .recover { case cause: Throwable =>
-              logWarning(
-                s"""Failed to parse serialized Spark schema in Parquet key-value metadata:
-                   |\t$serializedSchema
-                 """.stripMargin,
-                cause)
-            }
-            .map(_.asInstanceOf[StructType])
-            .toOption
-        }
-
-      maybeSparkSchema.getOrElse {
-        // Falls back to Parquet schema if Spark SQL schema is absent.
-        StructType.fromAttributes(
-          // TODO Really no need to use `Attribute` here, we only need to know the data type.
-          convertToAttributes(
-            parquetSchema,
-            sqlContext.conf.isParquetBinaryAsString,
-            sqlContext.conf.isParquetINT96AsTimestamp))
-      }
-    }.reduceOption { (left, right) =>
-      try left.merge(right) catch { case e: Throwable =>
-        throw new SparkException(s"Failed to merge incompatible schemas $left and $right", e)
-      }
-    }
-  }
-
-  /**
-   * Reconciles Hive Metastore case insensitivity issue and data type conflicts between Metastore
-   * schema and Parquet schema.
-   *
-   * Hive doesn't retain case information, while Parquet is case sensitive. On the other hand, the
-   * schema read from Parquet files may be incomplete (e.g. older versions of Parquet doesn't
-   * distinguish binary and string).  This method generates a correct schema by merging Metastore
-   * schema data types and Parquet schema field names.
-   */
-  private[parquet] def mergeMetastoreParquetSchema(
-      metastoreSchema: StructType,
-      parquetSchema: StructType): StructType = {
-    def schemaConflictMessage: String =
-      s"""Converting Hive Metastore Parquet, but detected conflicting schemas. Metastore schema:
-         |${metastoreSchema.prettyJson}
-         |
-         |Parquet schema:
-         |${parquetSchema.prettyJson}
-       """.stripMargin
-
-    val mergedParquetSchema = mergeMissingNullableFields(metastoreSchema, parquetSchema)
-
-    assert(metastoreSchema.size <= mergedParquetSchema.size, schemaConflictMessage)
-
-    val ordinalMap = metastoreSchema.zipWithIndex.map {
-      case (field, index) => field.name.toLowerCase -> index
-    }.toMap
-    val reorderedParquetSchema = mergedParquetSchema.sortBy(f =>
-      ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1))
-
-    StructType(metastoreSchema.zip(reorderedParquetSchema).map {
-      // Uses Parquet field names but retains Metastore data types.
-      case (mSchema, pSchema) if mSchema.name.toLowerCase == pSchema.name.toLowerCase =>
-        mSchema.copy(name = pSchema.name)
-      case _ =>
-        throw new SparkException(schemaConflictMessage)
-    })
-  }
-
-  /**
-   * Returns the original schema from the Parquet file with any missing nullable fields from the
-   * Hive Metastore schema merged in.
-   *
-   * When constructing a DataFrame from a collection of structured data, the resulting object has
-   * a schema corresponding to the union of the fields present in each element of the collection.
-   * Spark SQL simply assigns a null value to any field that isn't present for a particular row.
-   * In some cases, it is possible that a given table partition stored as a Parquet file doesn't
-   * contain a particular nullable field in its schema despite that field being present in the
-   * table schema obtained from the Hive Metastore. This method returns a schema representing the
-   * Parquet file schema along with any additional nullable fields from the Metastore schema
-   * merged in.
-   */
-  private[parquet] def mergeMissingNullableFields(
-      metastoreSchema: StructType,
-      parquetSchema: StructType): StructType = {
-    val fieldMap = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap
-    val missingFields = metastoreSchema
-      .map(_.name.toLowerCase)
-      .diff(parquetSchema.map(_.name.toLowerCase))
-      .map(fieldMap(_))
-      .filter(_.nullable)
-    StructType(parquetSchema ++ missingFields)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index a294297677d1a..7879328bbaaab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -293,9 +293,18 @@ private[sql] abstract class BaseWriterContainer(
   }
 
   private def newOutputCommitter(context: TaskAttemptContext): OutputCommitter = {
-    outputFormatClass.newInstance().getOutputCommitter(context)
+    val committerClass = context.getConfiguration.getClass(
+      "mapred.output.committer.class", null, classOf[OutputCommitter])
+
+    Option(committerClass).map { clazz =>
+      val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
+      ctor.newInstance(new Path(outputPath), context)
+    }.getOrElse {
+      outputFormatClass.newInstance().getOutputCommitter(context)
+    }
   }
 
+
   private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = {
     this.jobId = SparkHadoopWriter.createJobID(new Date, jobId)
     this.taskId = new TaskID(this.jobId, true, splitId)
@@ -345,6 +354,7 @@ private[sql] class DefaultWriterContainer(
 
   override protected def initWriters(): Unit = {
     writer = outputWriterClass.newInstance()
+    taskAttemptContext.getConfiguration.set("spark.sql.sources.output.path", outputPath)
     writer.init(getWorkPath, dataSchema, taskAttemptContext)
   }
 
@@ -384,11 +394,14 @@ private[sql] class DynamicPartitionWriterContainer(
         DynamicPartitionWriterContainer.escapePathName(string)
       }
       s"/$col=$valueString"
-    }.mkString
+    }.mkString.stripPrefix(Path.SEPARATOR)
 
     outputWriters.getOrElseUpdate(partitionPath, {
-      val path = new Path(getWorkPath, partitionPath.stripPrefix(Path.SEPARATOR))
+      val path = new Path(getWorkPath, partitionPath)
       val writer = outputWriterClass.newInstance()
+      taskAttemptContext.getConfiguration.set(
+        "spark.sql.sources.output.path",
+        new Path(outputPath, partitionPath).toString)
       writer.init(path.toString, dataSchema, taskAttemptContext)
       writer
     })
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
index 10d0ede4dc0dc..3bbc5b05868af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
@@ -63,7 +63,7 @@ class ParquetFilterSuiteBase extends QueryTest with ParquetTest {
         }.flatten.reduceOption(_ && _)
 
         val forParquetDataSource = query.queryExecution.optimizedPlan.collect {
-          case PhysicalOperation(_, filters, LogicalRelation(_: ParquetRelation2)) => filters
+          case PhysicalOperation(_, filters, LogicalRelation(_: FSBasedParquetRelation)) => filters
         }.flatten.reduceOption(_ && _)
 
         forParquetTableScan.orElse(forParquetDataSource)
@@ -350,7 +350,7 @@ class ParquetDataSourceOffFilterSuite extends ParquetFilterSuiteBase with Before
   override protected def afterAll(): Unit = {
     sqlContext.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalConf.toString)
   }
-  
+
   test("SPARK-6742: don't push down predicates which reference partition columns") {
     import sqlContext.implicits._
 
@@ -365,7 +365,7 @@ class ParquetDataSourceOffFilterSuite extends ParquetFilterSuiteBase with Before
           path,
           Some(sqlContext.sparkContext.hadoopConfiguration), sqlContext,
           Seq(AttributeReference("part", IntegerType, false)()) ))
-       
+
         checkAnswer(
           df.filter("a = 1 or part = 1"),
           (1 to 3).map(i => Row(1, i, i.toString)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
index b504842053690..7c371dbc7d3c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
@@ -119,7 +119,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     }
 
     // Decimals with precision above 18 are not yet supported
-    intercept[RuntimeException] {
+    intercept[Throwable] {
       withTempPath { dir =>
         makeDecimalRDD(DecimalType(19, 10)).saveAsParquetFile(dir.getCanonicalPath)
         parquetFile(dir.getCanonicalPath).collect()
@@ -127,7 +127,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     }
 
     // Unlimited-length decimals are not yet supported
-    intercept[RuntimeException] {
+    intercept[Throwable] {
       withTempPath { dir =>
         makeDecimalRDD(DecimalType.Unlimited).saveAsParquetFile(dir.getCanonicalPath)
         parquetFile(dir.getCanonicalPath).collect()
@@ -419,7 +419,7 @@ class ParquetDataSourceOnIOSuite extends ParquetIOSuiteBase with BeforeAndAfterA
   test("SPARK-6330 regression test") {
     // In 1.3.0, save to fs other than file: without configuring core-site.xml would get:
     // IllegalArgumentException: Wrong FS: hdfs://..., expected: file:///
-    intercept[java.io.FileNotFoundException] {
+    intercept[Throwable] {
       sqlContext.parquetFile("file:///nonexistent")
     }
     val errorMessage = intercept[Throwable] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index bea568ed40049..138e19766dc88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -39,7 +39,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
   import sqlContext._
   import sqlContext.implicits._
 
-  val defaultPartitionName = "__NULL__"
+  val defaultPartitionName = "__HIVE_DEFAULT_PARTITION__"
 
   test("column type inference") {
     def check(raw: String, literal: Literal): Unit = {
@@ -252,9 +252,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
 
       val parquetRelation = load(
         "org.apache.spark.sql.parquet",
-        Map(
-          "path" -> base.getCanonicalPath,
-          ParquetRelation2.DEFAULT_PARTITION_NAME -> defaultPartitionName))
+        Map("path" -> base.getCanonicalPath))
 
       parquetRelation.registerTempTable("t")
 
@@ -297,9 +295,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
 
       val parquetRelation = load(
         "org.apache.spark.sql.parquet",
-        Map(
-          "path" -> base.getCanonicalPath,
-          ParquetRelation2.DEFAULT_PARTITION_NAME -> defaultPartitionName))
+        Map("path" -> base.getCanonicalPath))
 
       parquetRelation.registerTempTable("t")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
index c964b6d984557..fc90e3edce7fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -204,7 +204,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
         StructField("lowerCase", StringType),
         StructField("UPPERCase", DoubleType, nullable = false)))) {
 
-      ParquetRelation2.mergeMetastoreParquetSchema(
+      FSBasedParquetRelation.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("lowercase", StringType),
           StructField("uppercase", DoubleType, nullable = false))),
@@ -219,7 +219,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
       StructType(Seq(
         StructField("UPPERCase", DoubleType, nullable = false)))) {
 
-      ParquetRelation2.mergeMetastoreParquetSchema(
+      FSBasedParquetRelation.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("uppercase", DoubleType, nullable = false))),
 
@@ -230,7 +230,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
 
     // Metastore schema contains additional non-nullable fields.
     assert(intercept[Throwable] {
-      ParquetRelation2.mergeMetastoreParquetSchema(
+      FSBasedParquetRelation.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("uppercase", DoubleType, nullable = false),
           StructField("lowerCase", BinaryType, nullable = false))),
@@ -241,7 +241,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
 
     // Conflicting non-nullable field names
     intercept[Throwable] {
-      ParquetRelation2.mergeMetastoreParquetSchema(
+      FSBasedParquetRelation.mergeMetastoreParquetSchema(
         StructType(Seq(StructField("lower", StringType, nullable = false))),
         StructType(Seq(StructField("lowerCase", BinaryType))))
     }
@@ -255,7 +255,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
         StructField("firstField", StringType, nullable = true),
         StructField("secondField", StringType, nullable = true),
         StructField("thirdfield", StringType, nullable = true)))) {
-      ParquetRelation2.mergeMetastoreParquetSchema(
+      FSBasedParquetRelation.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("firstfield", StringType, nullable = true),
           StructField("secondfield", StringType, nullable = true),
@@ -268,7 +268,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
     // Merge should fail if the Metastore contains any additional fields that are not
     // nullable.
     assert(intercept[Throwable] {
-      ParquetRelation2.mergeMetastoreParquetSchema(
+      FSBasedParquetRelation.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("firstfield", StringType, nullable = true),
           StructField("secondfield", StringType, nullable = true),
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index d754c8e3a8aa1..b0e82c8d033b2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -33,10 +33,10 @@ import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.hive.client._
-import org.apache.spark.sql.parquet.ParquetRelation2
+import org.apache.spark.sql.parquet.FSBasedParquetRelation
 import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, Partition => ParquetPartition, PartitionSpec, ResolvedDataSource}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{AnalysisException, SQLContext, SaveMode}
+import org.apache.spark.sql.{AnalysisException, SQLContext, SaveMode, sources}
 import org.apache.spark.util.Utils
 
 /* Implicit conversions */
@@ -226,8 +226,8 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
     // serialize the Metastore schema to JSON and pass it as a data source option because of the
     // evil case insensitivity issue, which is reconciled within `ParquetRelation2`.
     val parquetOptions = Map(
-      ParquetRelation2.METASTORE_SCHEMA -> metastoreSchema.json,
-      ParquetRelation2.MERGE_SCHEMA -> mergeSchema.toString)
+      FSBasedParquetRelation.METASTORE_SCHEMA -> metastoreSchema.json,
+      FSBasedParquetRelation.MERGE_SCHEMA -> mergeSchema.toString)
     val tableIdentifier =
       QualifiedTableName(metastoreRelation.databaseName, metastoreRelation.tableName)
 
@@ -238,13 +238,15 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         partitionSpecInMetastore: Option[PartitionSpec]): Option[LogicalRelation] = {
       cachedDataSourceTables.getIfPresent(tableIdentifier) match {
         case null => None // Cache miss
-        case logical@LogicalRelation(parquetRelation: ParquetRelation2) =>
+        case logical@LogicalRelation(parquetRelation: FSBasedParquetRelation) =>
           // If we have the same paths, same schema, and same partition spec,
           // we will use the cached Parquet Relation.
           val useCached =
             parquetRelation.paths.toSet == pathsInMetastore.toSet &&
             logical.schema.sameType(metastoreSchema) &&
-            parquetRelation.maybePartitionSpec == partitionSpecInMetastore
+            parquetRelation.partitionSpec == partitionSpecInMetastore.getOrElse {
+              PartitionSpec(StructType(Nil), Array.empty[sources.Partition])
+            }
 
           if (useCached) {
             Some(logical)
@@ -256,7 +258,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         case other =>
           logWarning(
             s"${metastoreRelation.databaseName}.${metastoreRelation.tableName} should be stored " +
-              s"as Parquet. However, we are getting a ${other} from the metastore cache. " +
+              s"as Parquet. However, we are getting a $other from the metastore cache. " +
               s"This cached entry will be invalidated.")
           cachedDataSourceTables.invalidate(tableIdentifier)
           None
@@ -278,8 +280,9 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
 
       val cached = getCached(tableIdentifier, paths, metastoreSchema, Some(partitionSpec))
       val parquetRelation = cached.getOrElse {
-        val created =
-          LogicalRelation(ParquetRelation2(paths, parquetOptions, None, Some(partitionSpec))(hive))
+        val created = LogicalRelation(
+          new FSBasedParquetRelation(
+            paths.toArray, None, Some(partitionSpec), parquetOptions)(hive))
         cachedDataSourceTables.put(tableIdentifier, created)
         created
       }
@@ -290,8 +293,8 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
 
       val cached = getCached(tableIdentifier, paths, metastoreSchema, None)
       val parquetRelation = cached.getOrElse {
-        val created =
-          LogicalRelation(ParquetRelation2(paths, parquetOptions)(hive))
+        val created = LogicalRelation(
+          new FSBasedParquetRelation(paths.toArray, None, None, parquetOptions)(hive))
         cachedDataSourceTables.put(tableIdentifier, created)
         created
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 47c60f651d14c..da5d203d9d343 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -21,21 +21,18 @@ import java.io.File
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.metastore.TableType
-import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.mapred.InvalidInputException
+import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.sql._
-import org.apache.spark.util.Utils
-import org.apache.spark.sql.types._
 import org.apache.spark.sql.hive.client.{HiveTable, ManagedTable}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.parquet.ParquetRelation2
+import org.apache.spark.sql.parquet.FSBasedParquetRelation
 import org.apache.spark.sql.sources.LogicalRelation
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /**
  * Tests for persisting tables created though the data sources API into the metastore.
@@ -582,11 +579,11 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
       )
 
       table("test_parquet_ctas").queryExecution.optimizedPlan match {
-        case LogicalRelation(p: ParquetRelation2) => // OK
+        case LogicalRelation(p: FSBasedParquetRelation) => // OK
         case _ =>
           fail(
             "test_parquet_ctas should be converted to " +
-            s"${classOf[ParquetRelation2].getCanonicalName}")
+            s"${classOf[FSBasedParquetRelation].getCanonicalName}")
       }
 
       // Clenup and reset confs.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index a5744ccc68a47..1d6393a3fec85 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -19,16 +19,14 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
 import org.apache.spark.sql.catalyst.errors.DialectException
-import org.apache.spark.sql.DefaultParserDialect
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SQLConf}
-import org.apache.spark.sql.hive.MetastoreRelation
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.hive.{HiveQLDialect, HiveShim}
-import org.apache.spark.sql.parquet.ParquetRelation2
+import org.apache.spark.sql.hive.{HiveQLDialect, HiveShim, MetastoreRelation}
+import org.apache.spark.sql.parquet.FSBasedParquetRelation
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, DefaultParserDialect, QueryTest, Row, SQLConf}
 
 case class Nested1(f1: Nested2)
 case class Nested2(f2: Nested3)
@@ -176,17 +174,17 @@ class SQLQuerySuite extends QueryTest {
     def checkRelation(tableName: String, isDataSourceParquet: Boolean): Unit = {
       val relation = EliminateSubQueries(catalog.lookupRelation(Seq(tableName)))
       relation match {
-        case LogicalRelation(r: ParquetRelation2) =>
+        case LogicalRelation(r: FSBasedParquetRelation) =>
           if (!isDataSourceParquet) {
             fail(
               s"${classOf[MetastoreRelation].getCanonicalName} is expected, but found " +
-              s"${ParquetRelation2.getClass.getCanonicalName}.")
+              s"${FSBasedParquetRelation.getClass.getCanonicalName}.")
           }
 
         case r: MetastoreRelation =>
           if (isDataSourceParquet) {
             fail(
-              s"${ParquetRelation2.getClass.getCanonicalName} is expected, but found " +
+              s"${FSBasedParquetRelation.getClass.getCanonicalName} is expected, but found " +
               s"${classOf[MetastoreRelation].getCanonicalName}.")
           }
       }
@@ -596,7 +594,7 @@ class SQLQuerySuite extends QueryTest {
       sql(s"DROP TABLE $tableName")
     }
   }
-  
+
   test("SPARK-5203 union with different decimal precision") {
     Seq.empty[(Decimal, Decimal)]
       .toDF("d1", "d2")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index bf1121ddf0273..41bcbe84b0ef2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -21,16 +21,15 @@ import java.io.File
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.{QueryTest, SQLConf}
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD}
 import org.apache.spark.sql.hive.execution.HiveTableScan
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.sources.{InsertIntoDataSource, LogicalRelation}
-import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
-import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.parquet.{FSBasedParquetRelation, ParquetTableScan}
+import org.apache.spark.sql.sources.{InsertIntoDataSource, InsertIntoFSBasedRelation, LogicalRelation}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode}
 import org.apache.spark.util.Utils
 
 // The data where the partitioning key exists only in the directory structure.
@@ -292,10 +291,10 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
     )
 
     table("test_parquet_ctas").queryExecution.optimizedPlan match {
-      case LogicalRelation(p: ParquetRelation2) => // OK
-      case _ =>
-        fail(
-          s"test_parquet_ctas should be converted to ${classOf[ParquetRelation2].getCanonicalName}")
+      case LogicalRelation(_: FSBasedParquetRelation) => // OK
+      case _ => fail(
+        "test_parquet_ctas should be converted to " +
+          s"${classOf[FSBasedParquetRelation].getCanonicalName}")
     }
 
     sql("DROP TABLE IF EXISTS test_parquet_ctas")
@@ -316,12 +315,10 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
 
     val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt")
     df.queryExecution.executedPlan match {
-      case ExecutedCommand(
-        InsertIntoDataSource(
-          LogicalRelation(r: ParquetRelation2), query, overwrite)) => // OK
+      case ExecutedCommand(InsertIntoFSBasedRelation(_: FSBasedParquetRelation, _, _, _)) => // OK
       case o => fail("test_insert_parquet should be converted to a " +
-        s"${classOf[ParquetRelation2].getCanonicalName} and " +
-        s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan." +
+        s"${classOf[FSBasedParquetRelation].getCanonicalName} and " +
+        s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan. " +
         s"However, found a ${o.toString} ")
     }
 
@@ -348,11 +345,9 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
 
     val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt_array")
     df.queryExecution.executedPlan match {
-      case ExecutedCommand(
-        InsertIntoDataSource(
-          LogicalRelation(r: ParquetRelation2), query, overwrite)) => // OK
+      case ExecutedCommand(InsertIntoFSBasedRelation(r: FSBasedParquetRelation, _, _, _)) => // OK
       case o => fail("test_insert_parquet should be converted to a " +
-        s"${classOf[ParquetRelation2].getCanonicalName} and " +
+        s"${classOf[FSBasedParquetRelation].getCanonicalName} and " +
         s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan." +
         s"However, found a ${o.toString} ")
     }
@@ -383,7 +378,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
 
     assertResult(2) {
       analyzed.collect {
-        case r @ LogicalRelation(_: ParquetRelation2) => r
+        case r @ LogicalRelation(_: FSBasedParquetRelation) => r
       }.size
     }
 
@@ -395,7 +390,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
       // Converted test_parquet should be cached.
       catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) match {
         case null => fail("Converted test_parquet should be cached in the cache.")
-        case logical @ LogicalRelation(parquetRelation: ParquetRelation2) => // OK
+        case logical @ LogicalRelation(parquetRelation: FSBasedParquetRelation) => // OK
         case other =>
           fail(
             "The cached test_parquet should be a Parquet Relation. " +
@@ -693,7 +688,7 @@ class ParquetDataSourceOnSourceSuite extends ParquetSourceSuiteBase {
 
     val df = Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str")
     val df2 = df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
-    intercept[RuntimeException](df2.saveAsParquetFile(filePath))
+    intercept[Throwable](df2.saveAsParquetFile(filePath))
 
     val df3 = df2.toDF("str", "max_int")
     df3.saveAsParquetFile(filePath2)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/fsBasedRelationSuites.scala
similarity index 83%
rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/sources/fsBasedRelationSuites.scala
index e8b48a0db1c79..394833f22907d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/FSBasedRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/fsBasedRelationSuites.scala
@@ -28,12 +28,14 @@ import org.apache.spark.sql.types._
 // TODO Don't extend ParquetTest
 // This test suite extends ParquetTest for some convenient utility methods. These methods should be
 // moved to some more general places, maybe QueryTest.
-class FSBasedRelationSuite extends QueryTest with ParquetTest {
+class FSBasedRelationTest extends QueryTest with ParquetTest {
   override val sqlContext: SQLContext = TestHive
 
   import sqlContext._
   import sqlContext.implicits._
 
+  val dataSourceName = classOf[SimpleTextSource].getCanonicalName
+
   val dataSchema =
     StructType(
       Seq(
@@ -92,17 +94,17 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempPath { file =>
       testDF.save(
         path = file.getCanonicalPath,
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite)
 
       testDF.save(
         path = file.getCanonicalPath,
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite)
 
       checkAnswer(
         load(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           options = Map(
             "path" -> file.getCanonicalPath,
             "dataSchema" -> dataSchema.json)),
@@ -114,17 +116,17 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempPath { file =>
       testDF.save(
         path = file.getCanonicalPath,
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite)
 
       testDF.save(
         path = file.getCanonicalPath,
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Append)
 
       checkAnswer(
         load(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           options = Map(
             "path" -> file.getCanonicalPath,
             "dataSchema" -> dataSchema.json)).orderBy("a"),
@@ -137,7 +139,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
       intercept[RuntimeException] {
         testDF.save(
           path = file.getCanonicalPath,
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           mode = SaveMode.ErrorIfExists)
       }
     }
@@ -147,7 +149,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempDir { file =>
       testDF.save(
         path = file.getCanonicalPath,
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Ignore)
 
       val path = new Path(file.getCanonicalPath)
@@ -159,62 +161,37 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("save()/load() - partitioned table - simple queries") {
     withTempPath { file =>
       partitionedTestDF.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.ErrorIfExists,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       checkQueries(
         load(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           options = Map(
             "path" -> file.getCanonicalPath,
             "dataSchema" -> dataSchema.json)))
     }
   }
 
-  test("save()/load() - partitioned table - simple queries - partition columns in data") {
-    withTempDir { file =>
-      val basePath = new Path(file.getCanonicalPath)
-      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
-      val qualifiedBasePath = fs.makeQualified(basePath)
-
-      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
-        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
-        sparkContext
-          .parallelize(for (i <- 1 to 3) yield s"$i,val_$i,$p1")
-          .saveAsTextFile(partitionDir.toString)
-      }
-
-      val dataSchemaWithPartition =
-        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
-
-      checkQueries(
-        load(
-          source = classOf[SimpleTextSource].getCanonicalName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchemaWithPartition.json)))
-    }
-  }
-
   test("save()/load() - partitioned table - Overwrite") {
     withTempPath { file =>
       partitionedTestDF.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       partitionedTestDF.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       checkAnswer(
         load(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           options = Map(
             "path" -> file.getCanonicalPath,
             "dataSchema" -> dataSchema.json)),
@@ -225,20 +202,20 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("save()/load() - partitioned table - Append") {
     withTempPath { file =>
       partitionedTestDF.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       partitionedTestDF.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Append,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       checkAnswer(
         load(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           options = Map(
             "path" -> file.getCanonicalPath,
             "dataSchema" -> dataSchema.json)),
@@ -249,20 +226,20 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("save()/load() - partitioned table - Append - new partition values") {
     withTempPath { file =>
       partitionedTestDF1.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       partitionedTestDF2.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Append,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       checkAnswer(
         load(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           options = Map(
             "path" -> file.getCanonicalPath,
             "dataSchema" -> dataSchema.json)),
@@ -274,7 +251,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempDir { file =>
       intercept[RuntimeException] {
         partitionedTestDF.save(
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           mode = SaveMode.ErrorIfExists,
           options = Map("path" -> file.getCanonicalPath),
           partitionColumns = Seq("p1", "p2"))
@@ -286,7 +263,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempDir { file =>
       partitionedTestDF.save(
         path = file.getCanonicalPath,
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Ignore)
 
       val path = new Path(file.getCanonicalPath)
@@ -302,7 +279,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - non-partitioned table - Overwrite") {
     testDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       Map("dataSchema" -> dataSchema.json))
 
@@ -314,12 +291,12 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - non-partitioned table - Append") {
     testDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite)
 
     testDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Append)
 
     withTable("t") {
@@ -334,7 +311,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
       intercept[AnalysisException] {
         testDF.saveAsTable(
           tableName = "t",
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           mode = SaveMode.ErrorIfExists)
       }
     }
@@ -346,7 +323,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempTable("t") {
       testDF.saveAsTable(
         tableName = "t",
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Ignore)
 
       assert(table("t").collect().isEmpty)
@@ -356,7 +333,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - partitioned table - simple queries") {
     partitionedTestDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       Map("dataSchema" -> dataSchema.json))
 
@@ -368,14 +345,14 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - partitioned table - Overwrite") {
     partitionedTestDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
 
     partitionedTestDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
@@ -388,14 +365,14 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - partitioned table - Append") {
     partitionedTestDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
 
     partitionedTestDF.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Append,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
@@ -408,14 +385,14 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - partitioned table - Append - new partition values") {
     partitionedTestDF1.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
 
     partitionedTestDF2.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Append,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
@@ -428,7 +405,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("saveAsTable()/load() - partitioned table - Append - mismatched partition columns") {
     partitionedTestDF1.saveAsTable(
       tableName = "t",
-      source = classOf[SimpleTextSource].getCanonicalName,
+      source = dataSourceName,
       mode = SaveMode.Overwrite,
       options = Map("dataSchema" -> dataSchema.json),
       partitionColumns = Seq("p1", "p2"))
@@ -437,7 +414,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     intercept[Throwable] {
       partitionedTestDF2.saveAsTable(
         tableName = "t",
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Append,
         options = Map("dataSchema" -> dataSchema.json),
         partitionColumns = Seq("p1"))
@@ -447,7 +424,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     intercept[Throwable] {
       partitionedTestDF2.saveAsTable(
         tableName = "t",
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Append,
         options = Map("dataSchema" -> dataSchema.json),
         partitionColumns = Seq("p2", "p1"))
@@ -461,7 +438,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
       intercept[AnalysisException] {
         partitionedTestDF.saveAsTable(
           tableName = "t",
-          source = classOf[SimpleTextSource].getCanonicalName,
+          source = dataSourceName,
           mode = SaveMode.ErrorIfExists,
           options = Map("dataSchema" -> dataSchema.json),
           partitionColumns = Seq("p1", "p2"))
@@ -475,7 +452,7 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     withTempTable("t") {
       partitionedTestDF.saveAsTable(
         tableName = "t",
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Ignore,
         options = Map("dataSchema" -> dataSchema.json),
         partitionColumns = Seq("p1", "p2"))
@@ -487,13 +464,13 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
   test("Hadoop style globbing") {
     withTempPath { file =>
       partitionedTestDF.save(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         mode = SaveMode.Overwrite,
         options = Map("path" -> file.getCanonicalPath),
         partitionColumns = Seq("p1", "p2"))
 
       val df = load(
-        source = classOf[SimpleTextSource].getCanonicalName,
+        source = dataSourceName,
         options = Map(
           "path" -> s"${file.getCanonicalPath}/p1=*/p2=???",
           "dataSchema" -> dataSchema.json))
@@ -521,3 +498,67 @@ class FSBasedRelationSuite extends QueryTest with ParquetTest {
     }
   }
 }
+
+class SimpleTextRelationSuite extends FSBasedRelationTest {
+  override val dataSourceName: String = classOf[SimpleTextSource].getCanonicalName
+
+  import sqlContext._
+
+  test("save()/load() - partitioned table - simple queries - partition columns in data") {
+    withTempDir { file =>
+      val basePath = new Path(file.getCanonicalPath)
+      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
+      val qualifiedBasePath = fs.makeQualified(basePath)
+
+      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
+        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
+        sparkContext
+          .parallelize(for (i <- 1 to 3) yield s"$i,val_$i,$p1")
+          .saveAsTextFile(partitionDir.toString)
+      }
+
+      val dataSchemaWithPartition =
+        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
+
+      checkQueries(
+        load(
+          source = dataSourceName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchemaWithPartition.json)))
+    }
+  }
+}
+
+class FSBasedParquetRelationSuite extends FSBasedRelationTest {
+  override val dataSourceName: String = classOf[parquet.DefaultSource].getCanonicalName
+
+  import sqlContext._
+  import sqlContext.implicits._
+
+  test("save()/load() - partitioned table - simple queries - partition columns in data") {
+    withTempDir { file =>
+      val basePath = new Path(file.getCanonicalPath)
+      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
+      val qualifiedBasePath = fs.makeQualified(basePath)
+
+      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
+        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
+        sparkContext
+          .parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1))
+          .toDF("a", "b", "p1")
+          .saveAsParquetFile(partitionDir.toString)
+      }
+
+      val dataSchemaWithPartition =
+        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
+
+      checkQueries(
+        load(
+          source = dataSourceName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchemaWithPartition.json)))
+    }
+  }
+}

From 213a6f30fee4a1c416ea76b678c71877fd36ef18 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Wed, 13 May 2015 12:47:48 -0700
Subject: [PATCH 143/320] [SPARK-7551][DataFrame] support backticks for
 DataFrame attribute resolution

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6074 from cloud-fan/7551 and squashes the following commits:

e6f579e [Wenchen Fan] allow space
2b86699 [Wenchen Fan] handle blank
e218d99 [Wenchen Fan] address comments
54c4209 [Wenchen Fan] fix 7551
---
 .../catalyst/plans/logical/LogicalPlan.scala  | 55 ++++++++++++++++++-
 .../org/apache/spark/sql/DataFrame.scala      |  4 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 27 +++++++++
 3 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index dbb12d56f9497..dba69659afc80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -105,7 +105,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
   }
 
   /**
-   * Optionally resolves the given string to a [[NamedExpression]] using the input from all child
+   * Optionally resolves the given strings to a [[NamedExpression]] using the input from all child
    * nodes of this LogicalPlan. The attribute is expressed as
    * as string in the following form: `[scope].AttributeName.[nested].[fields]...`.
    */
@@ -116,7 +116,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
     resolve(nameParts, children.flatMap(_.output), resolver, throwErrors)
 
   /**
-   * Optionally resolves the given string to a [[NamedExpression]] based on the output of this
+   * Optionally resolves the given strings to a [[NamedExpression]] based on the output of this
    * LogicalPlan. The attribute is expressed as string in the following form:
    * `[scope].AttributeName.[nested].[fields]...`.
    */
@@ -126,6 +126,57 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
       throwErrors: Boolean = false): Option[NamedExpression] =
     resolve(nameParts, output, resolver, throwErrors)
 
+  /**
+   * Given an attribute name, split it to name parts by dot, but
+   * don't split the name parts quoted by backticks, for example,
+   * `ab.cd`.`efg` should be split into two parts "ab.cd" and "efg".
+   */
+  def resolveQuoted(
+      name: String,
+      resolver: Resolver): Option[NamedExpression] = {
+    resolve(parseAttributeName(name), resolver, true)
+  }
+
+  /**
+   * Internal method, used to split attribute name by dot with backticks rule.
+   * Backticks must appear in pairs, and the quoted string must be a complete name part,
+   * which means `ab..c`e.f is not allowed.
+   * Escape character is not supported now, so we can't use backtick inside name part.
+   */
+  private def parseAttributeName(name: String): Seq[String] = {
+    val e = new AnalysisException(s"syntax error in attribute name: $name")
+    val nameParts = scala.collection.mutable.ArrayBuffer.empty[String]
+    val tmp = scala.collection.mutable.ArrayBuffer.empty[Char]
+    var inBacktick = false
+    var i = 0
+    while (i < name.length) {
+      val char = name(i)
+      if (inBacktick) {
+        if (char == '`') {
+          inBacktick = false
+          if (i + 1 < name.length && name(i + 1) != '.') throw e
+        } else {
+          tmp += char
+        }
+      } else {
+        if (char == '`') {
+          if (tmp.nonEmpty) throw e
+          inBacktick = true
+        } else if (char == '.') {
+          if (tmp.isEmpty) throw e
+          nameParts += tmp.mkString
+          tmp.clear()
+        } else {
+          tmp += char
+        }
+      }
+      i += 1
+    }
+    if (tmp.isEmpty || inBacktick) throw e
+    nameParts += tmp.mkString
+    nameParts.toSeq
+  }
+
   /**
    * Resolve the given `name` string against the given attribute, returning either 0 or 1 match.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index c820a673575ff..4fd5105c27443 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -160,7 +160,7 @@ class DataFrame private[sql](
   }
 
   protected[sql] def resolve(colName: String): NamedExpression = {
-    queryExecution.analyzed.resolve(colName.split("\\."), sqlContext.analyzer.resolver).getOrElse {
+    queryExecution.analyzed.resolveQuoted(colName, sqlContext.analyzer.resolver).getOrElse {
       throw new AnalysisException(
         s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""")
     }
@@ -168,7 +168,7 @@ class DataFrame private[sql](
 
   protected[sql] def numericColumns: Seq[Expression] = {
     schema.fields.filter(_.dataType.isInstanceOf[NumericType]).map { n =>
-      queryExecution.analyzed.resolve(n.name.split("\\."), sqlContext.analyzer.resolver).get
+      queryExecution.analyzed.resolveQuoted(n.name, sqlContext.analyzer.resolver).get
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 52aa1f6558f80..1d5f6b3aad6fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -459,6 +459,33 @@ class DataFrameSuite extends QueryTest {
     assert(complexData.filter(complexData("m")(complexData("s")("value")) === 1).count() == 1)
   }
 
+  test("SPARK-7551: support backticks for DataFrame attribute resolution") {
+    val df = TestSQLContext.jsonRDD(TestSQLContext.sparkContext.makeRDD(
+      """{"a.b": {"c": {"d..e": {"f": 1}}}}""" :: Nil))
+    checkAnswer(
+      df.select(df("`a.b`.c.`d..e`.`f`")),
+      Row(1)
+    )
+
+    val df2 = TestSQLContext.jsonRDD(TestSQLContext.sparkContext.makeRDD(
+      """{"a  b": {"c": {"d  e": {"f": 1}}}}""" :: Nil))
+    checkAnswer(
+      df2.select(df2("`a  b`.c.d  e.f")),
+      Row(1)
+    )
+
+    def checkError(testFun: => Unit): Unit = {
+      val e = intercept[org.apache.spark.sql.AnalysisException] {
+        testFun
+      }
+      assert(e.getMessage.contains("syntax error in attribute name:"))
+    }
+    checkError(df("`abc.`c`"))
+    checkError(df("`abc`..d"))
+    checkError(df("`a`.b."))
+    checkError(df("`a.b`.c.`d"))
+  }
+
   test("SPARK-7324 dropDuplicates") {
     val testData = TestSQLContext.sparkContext.parallelize(
       (2, 1, 2) :: (1, 1, 1) ::

From e676fc0c6326f3ddeced87214cc88534ea646473 Mon Sep 17 00:00:00 2001
From: Tim Ellison <t.p.ellison@gmail.com>
Date: Wed, 13 May 2015 21:00:12 +0100
Subject: [PATCH 144/320] [MINOR] Avoid passing the PermGenSize option to IBM
 JVMs.

IBM's Java VM doesn't have the concept of a permgen, so this option shouldn't be passed when the vendor property shows it is an IBM JDK.

Author: Tim Ellison <t.p.ellison@gmail.com>
Author: Tim Ellison <tellison@users.noreply.github.com>

Closes #6055 from tellison/MaxPermSize and squashes the following commits:

3a0fb66 [Tim Ellison] Convert tabs back to spaces
6ad4266 [Tim Ellison] Remove unnecessary else clauses to reduce nesting.
d27174b [Tim Ellison] Merge branch 'master' of https://github.com/apache/spark into MaxPermSize
42a8c3f [Tim Ellison] [MINOR] Avoid passing the PermGenSize option to IBM JVMs.
---
 .../launcher/AbstractCommandBuilder.java      |  5 ++++-
 .../spark/launcher/CommandBuilderUtils.java   | 20 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index b8f02b961113d..33fd813f7a86c 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -121,7 +121,10 @@ List<String> buildJavaCommand(String extraClassPath) throws IOException {
    * set it.
    */
   void addPermGenSizeOpt(List<String> cmd) {
-    // Don't set MaxPermSize for Java 8 and later.
+    // Don't set MaxPermSize for IBM Java, or Oracle Java 8 and later.
+    if (getJavaVendor() == JavaVendor.IBM) {
+      return;
+    }
     String[] version = System.getProperty("java.version").split("\\.");
     if (Integer.parseInt(version[0]) > 1 || Integer.parseInt(version[1]) > 7) {
       return;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index 261402856ac5e..2665a700fe1f5 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -32,6 +32,11 @@ class CommandBuilderUtils {
   static final String ENV_SPARK_HOME = "SPARK_HOME";
   static final String ENV_SPARK_ASSEMBLY = "_SPARK_ASSEMBLY";
 
+  /** The set of known JVM vendors. */
+  static enum JavaVendor {
+    Oracle, IBM, OpenJDK, Unknown
+  };
+
   /** Returns whether the given string is null or empty. */
   static boolean isEmpty(String s) {
     return s == null || s.isEmpty();
@@ -108,6 +113,21 @@ static boolean isWindows() {
     return os.startsWith("Windows");
   }
 
+  /** Returns an enum value indicating whose JVM is being used. */
+  static JavaVendor getJavaVendor() {
+    String vendorString = System.getProperty("java.vendor");
+    if (vendorString.contains("Oracle")) {
+      return JavaVendor.Oracle;
+    }
+    if (vendorString.contains("IBM")) {
+      return JavaVendor.IBM;
+    }
+    if (vendorString.contains("OpenJDK")) {
+      return JavaVendor.OpenJDK;
+    }
+    return JavaVendor.Unknown;
+  }
+
   /**
    * Updates the user environment, appending the given pathList to the existing value of the given
    * environment variable (or setting it if it hasn't yet been set).

From 3cd9ad2406c59cd0ede6c9c8428a4ce4b805f8fa Mon Sep 17 00:00:00 2001
From: Tim Ellison <t.p.ellison@gmail.com>
Date: Wed, 13 May 2015 21:01:42 +0100
Subject: [PATCH 145/320] =?UTF-8?q?[MINOR]=20Enhance=20SizeEstimator=20to?=
 =?UTF-8?q?=20detect=20IBM=20compressed=20refs=20and=20s390=20=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…arch.

 - zSeries 64-bit Java reports its architecture as s390x, so enhance the 64-bit check to accommodate that value.

 - SizeEstimator can detect whether IBM Java is using compressed object pointers using info in the "java.vm.info" property, so will do a better job than failing on the HotSpot MBean and guessing.

Author: Tim Ellison <t.p.ellison@gmail.com>

Closes #6085 from tellison/SizeEstimator and squashes the following commits:

1b6ff6a [Tim Ellison] Merge branch 'master' of https://github.com/apache/spark into SizeEstimator
0968989 [Tim Ellison] [MINOR] Enhance SizeEstimator to detect IBM compressed refs and s390 arch.
---
 .../scala/org/apache/spark/util/SizeEstimator.scala  |  8 +++++++-
 .../org/apache/spark/util/SizeEstimatorSuite.scala   | 12 ++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index d91c3294ddb8b..968a72d5adae9 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -75,7 +75,8 @@ private[spark] object SizeEstimator extends Logging {
   // Sets object size, pointer size based on architecture and CompressedOops settings
   // from the JVM.
   private def initialize() {
-    is64bit = System.getProperty("os.arch").contains("64")
+    val arch = System.getProperty("os.arch")
+    is64bit = arch.contains("64") || arch.contains("s390x")
     isCompressedOops = getIsCompressedOops
 
     objectSize = if (!is64bit) 8 else {
@@ -97,6 +98,11 @@ private[spark] object SizeEstimator extends Logging {
       return System.getProperty("spark.test.useCompressedOops").toBoolean
     }
 
+    // java.vm.info provides compressed ref info for IBM JDKs
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      return System.getProperty("java.vm.info").contains("Compressed Ref")
+    }
+
     try {
       val hotSpotMBeanName = "com.sun.management:type=HotSpotDiagnostic"
       val server = ManagementFactory.getPlatformMBeanServer()
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 133a76f28e000..04f0f3749d6b9 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -45,6 +45,10 @@ class DummyClass6 extends DummyClass5 {
   val y: Boolean = true
 }
 
+class DummyClass7 {
+  val x: DummyClass1 = new DummyClass1
+}
+
 object DummyString {
   def apply(str: String) : DummyString = new DummyString(str.toArray)
 }
@@ -197,4 +201,12 @@ class SizeEstimatorSuite
     assertResult(24)(SizeEstimator.estimate(new DummyClass5))
     assertResult(32)(SizeEstimator.estimate(new DummyClass6))
   }
+
+  test("check 64-bit detection for s390x arch") {
+    System.setProperty("os.arch", "s390x")
+    val initialize = PrivateMethod[Unit]('initialize)
+    SizeEstimator invokePrivate initialize()
+    // Class should be 32 bytes on s390x if recognised as 64 bit platform
+    assertResult(32)(SizeEstimator.estimate(new DummyClass7))
+  }
 }

From 51030b8a9d4f3feb7a5d2249cc867fd6a06f0336 Mon Sep 17 00:00:00 2001
From: Tim Ellison <tellison@users.noreply.github.com>
Date: Wed, 13 May 2015 21:16:32 +0100
Subject: [PATCH 146/320] [MINOR] [CORE] Accept alternative mesos unsatisfied
 link error in test.

The IBM JVM reports an failed library load with a slightly different error message to Oracle's JVM.  Update the test case to allow for either form.

Author: Tim Ellison <tellison@users.noreply.github.com>
Author: Tim Ellison <t.p.ellison@gmail.com>

Closes #6119 from tellison/LibraryLoading and squashes the following commits:

2c5cd4e [Tim Ellison] Reduce assertion to check for the mesos library name
f48c194 [Tim Ellison] Split long line
b1079d7 [Tim Ellison] [MINOR] [CORE] Accept alternative mesos unsatisfied link error in test.
---
 .../org/apache/spark/SparkContextSchedulerCreationSuite.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index bbed8ddc6bafc..9343f4fff89da 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -159,7 +159,7 @@ class SparkContextSchedulerCreationSuite
       assert(sched.backend.getClass === expectedClass)
     } catch {
       case e: UnsatisfiedLinkError =>
-        assert(e.getMessage.contains("no mesos in"))
+        assert(e.getMessage.contains("mesos"))
         logWarning("Mesos not available, could not test actual Mesos scheduler creation")
       case e: Throwable => fail(e)
     }

From 5db18ba6e1bd8c6307c41549176c53590cf344a0 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Wed, 13 May 2015 13:21:36 -0700
Subject: [PATCH 147/320] [SPARK-7593] [ML] Python Api for
 ml.feature.Bucketizer

Added `ml.feature.Bucketizer` to PySpark.

cc mengxr

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #6124 from brkyvz/ml-bucket and squashes the following commits:

05285be [Burak Yavuz] added sphinx doc
6abb6ed [Burak Yavuz] added support for Bucketizer
---
 .../apache/spark/ml/feature/Bucketizer.scala  |  2 +-
 .../org/apache/spark/ml/param/params.scala    | 15 +++-
 python/pyspark/ml/feature.py                  | 77 +++++++++++++++++++
 3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index b28c88aaaecbc..e52d797293cf3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -48,7 +48,7 @@ final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
    * otherwise, values outside the splits specified will be treated as errors.
    * @group param
    */
-  val splits: Param[Array[Double]] = new Param[Array[Double]](this, "splits",
+  val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits",
     "Split points for mapping continuous features into buckets. With n+1 splits, there are n " +
       "buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " +
       "bucket, which also includes y. The splits should be strictly increasing. " +
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 7ebbf106ee753..5a7ec29aac6cc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -219,7 +219,7 @@ class BooleanParam(parent: Params, name: String, doc: String) // No need for isV
   override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
 }
 
-/** Specialized version of [[Param[Array[T]]]] for Java. */
+/** Specialized version of [[Param[Array[String]]]] for Java. */
 class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean)
   extends Param[Array[String]](parent, name, doc, isValid) {
 
@@ -232,6 +232,19 @@ class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array
   def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray)
 }
 
+/** Specialized version of [[Param[Array[Double]]]] for Java. */
+class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array[Double] => Boolean)
+  extends Param[Array[Double]](parent, name, doc, isValid) {
+
+  def this(parent: Params, name: String, doc: String) =
+    this(parent, name, doc, ParamValidators.alwaysTrue)
+
+  override def w(value: Array[Double]): ParamPair[Array[Double]] = super.w(value)
+
+  /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */
+  def w(value: java.util.List[Double]): ParamPair[Array[Double]] = w(value.asScala.toArray)
+}
+
 /**
  * A param amd its value.
  */
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index f35bc1463d51b..30e1fd4922d0a 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -83,6 +83,83 @@ def getThreshold(self):
         return self.getOrDefault(self.threshold)
 
 
+@inherit_doc
+class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
+    """
+    Maps a column of continuous features to a column of feature buckets.
+
+    >>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
+    >>> bucketizer = Bucketizer(splits=[-float("inf"), 0.5, 1.4, float("inf")],
+    ...     inputCol="values", outputCol="buckets")
+    >>> bucketed = bucketizer.transform(df).collect()
+    >>> bucketed[0].buckets
+    0.0
+    >>> bucketed[1].buckets
+    0.0
+    >>> bucketed[2].buckets
+    1.0
+    >>> bucketed[3].buckets
+    2.0
+    >>> bucketizer.setParams(outputCol="b").transform(df).head().b
+    0.0
+    """
+
+    _java_class = "org.apache.spark.ml.feature.Bucketizer"
+    # a placeholder to make it appear in the generated doc
+    splits = \
+        Param(Params._dummy(), "splits",
+              "Split points for mapping continuous features into buckets. With n+1 splits, " +
+              "there are n buckets. A bucket defined by splits x,y holds values in the " +
+              "range [x,y) except the last bucket, which also includes y. The splits " +
+              "should be strictly increasing. Values at -inf, inf must be explicitly " +
+              "provided to cover all Double values; otherwise, values outside the splits " +
+              "specified will be treated as errors.")
+
+    @keyword_only
+    def __init__(self, splits=None, inputCol=None, outputCol=None):
+        """
+        __init__(self, splits=None, inputCol=None, outputCol=None)
+        """
+        super(Bucketizer, self).__init__()
+        #: param for Splitting points for mapping continuous features into buckets. With n+1 splits,
+        #  there are n buckets. A bucket defined by splits x,y holds values in the range [x,y)
+        #  except the last bucket, which also includes y. The splits should be strictly increasing.
+        #  Values at -inf, inf must be explicitly provided to cover all Double values; otherwise,
+        #  values outside the splits specified will be treated as errors.
+        self.splits = \
+            Param(self, "splits",
+                  "Split points for mapping continuous features into buckets. With n+1 splits, " +
+                  "there are n buckets. A bucket defined by splits x,y holds values in the " +
+                  "range [x,y) except the last bucket, which also includes y. The splits " +
+                  "should be strictly increasing. Values at -inf, inf must be explicitly " +
+                  "provided to cover all Double values; otherwise, values outside the splits " +
+                  "specified will be treated as errors.")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, splits=None, inputCol=None, outputCol=None):
+        """
+        setParams(self, splits=None, inputCol=None, outputCol=None)
+        Sets params for this Bucketizer.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def setSplits(self, value):
+        """
+        Sets the value of :py:attr:`splits`.
+        """
+        self.paramMap[self.splits] = value
+        return self
+
+    def getSplits(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.splits)
+
+
 @inherit_doc
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
     """

From 61e05fc58e1245de871c409b60951745b5db3420 Mon Sep 17 00:00:00 2001
From: leahmcguire <lmcguire@salesforce.com>
Date: Wed, 13 May 2015 14:13:19 -0700
Subject: [PATCH 148/320] [SPARK-7545] [MLLIB] Added check in Bernoulli Naive
 Bayes to make sure that both training and predict features have values of 0
 or 1

Author: leahmcguire <lmcguire@salesforce.com>

Closes #6073 from leahmcguire/binaryCheckNB and squashes the following commits:

b8442c2 [leahmcguire] changed to if else for value checks
911bf83 [leahmcguire] undid reformat
4eedf1e [leahmcguire] moved bernoulli check
9ee9e84 [leahmcguire] fixed style error
3f3b32c [leahmcguire] fixed zero one check so only called in combiner
831fd27 [leahmcguire] got test working
f44bb3c [leahmcguire] removed changes from CV branch
67253f0 [leahmcguire] added check to bernoulli to ensure feature values are zero or one
f191c71 [leahmcguire] fixed name
58d060b [leahmcguire] changed param name and test according to comments
04f0d3c [leahmcguire] Added stats from cross validation as a val in the cross validation model to save them for user access
---
 .../mllib/classification/NaiveBayes.scala     | 28 ++++++++++++++--
 .../classification/NaiveBayesSuite.scala      | 33 +++++++++++++++++++
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index c9b3ff0172e2e..b381dc2cb0140 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -87,12 +87,17 @@ class NaiveBayesModel private[mllib] (
   }
 
   override def predict(testData: Vector): Double = {
+    val brzData = testData.toBreeze
     modelType match {
       case "Multinomial" =>
-        labels (brzArgmax (brzPi + brzTheta * testData.toBreeze) )
+        labels (brzArgmax (brzPi + brzTheta * brzData) )
       case "Bernoulli" =>
+        if (!brzData.forall(v => v == 0.0 || v == 1.0)) {
+          throw new SparkException(
+            s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $testData.")
+        }
         labels (brzArgmax (brzPi +
-          (brzTheta - brzNegTheta.get) * testData.toBreeze + brzNegThetaSum.get))
+          (brzTheta - brzNegTheta.get) * brzData + brzNegThetaSum.get))
       case _ =>
         // This should never happen.
         throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType")
@@ -293,12 +298,29 @@ class NaiveBayes private (
       }
     }
 
+    val requireZeroOneBernoulliValues: Vector => Unit = (v: Vector) => {
+      val values = v match {
+        case SparseVector(size, indices, values) =>
+          values
+        case DenseVector(values) =>
+          values
+      }
+      if (!values.forall(v => v == 0.0 || v == 1.0)) {
+        throw new SparkException(
+          s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $v.")
+      }
+    }
+
     // Aggregates term frequencies per label.
     // TODO: Calling combineByKey and collect creates two stages, we can implement something
     // TODO: similar to reduceByKeyLocally to save one stage.
     val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, BDV[Double])](
       createCombiner = (v: Vector) => {
-        requireNonnegativeValues(v)
+        if (modelType == "Bernoulli") {
+          requireZeroOneBernoulliValues(v)
+        } else {
+          requireNonnegativeValues(v)
+        }
         (1L, v.toBreeze.toDenseVector)
       },
       mergeValue = (c: (Long, BDV[Double]), v: Vector) => {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index ea89b17b7c08f..40a79a1f19bd9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -208,6 +208,39 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("detect non zero or one values in Bernoulli") {
+    val badTrain = Seq(
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(0.0, Vectors.dense(2.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(1.0, Vectors.dense(0.0)))
+
+    intercept[SparkException] {
+      NaiveBayes.train(sc.makeRDD(badTrain, 2), 1.0, "Bernoulli")
+    }
+
+    val okTrain = Seq(
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(0.0, Vectors.dense(0.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(0.0, Vectors.dense(0.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0)),
+      LabeledPoint(1.0, Vectors.dense(1.0))
+    )
+
+    val badPredict = Seq(
+      Vectors.dense(1.0),
+      Vectors.dense(2.0),
+      Vectors.dense(1.0),
+      Vectors.dense(0.0))
+
+    val model = NaiveBayes.train(sc.makeRDD(okTrain, 2), 1.0, "Bernoulli")
+    intercept[SparkException] {
+      model.predict(sc.makeRDD(badPredict, 2)).collect()
+    }
+  }
+
   test("model save/load: 2.0 to 2.0") {
     val tempDir = Utils.createTempDir()
     val path = tempDir.toURI.toString

From df2fb1305aba6781017b0973b0965b664f835e31 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Wed, 13 May 2015 15:13:09 -0700
Subject: [PATCH 149/320] [SPARK-7382] [MLLIB] Feature Parity in PySpark for
 ml.classification

The missing pieces in ml.classification for Python!

cc mengxr

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #6106 from brkyvz/ml-class and squashes the following commits:

dd78237 [Burak Yavuz] fix style
1048e29 [Burak Yavuz] ready for PR
---
 python/pyspark/ml/classification.py           | 478 +++++++++++++++++-
 .../ml/param/_shared_params_code_gen.py       |   4 +
 python/pyspark/ml/param/shared.py             |  29 ++
 3 files changed, 501 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 8a009c4ac721f..96d29058a3781 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -17,17 +17,19 @@
 
 from pyspark.ml.util import keyword_only
 from pyspark.ml.wrapper import JavaEstimator, JavaModel
-from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,\
-    HasRegParam
+from pyspark.ml.param.shared import *
+from pyspark.ml.regression import RandomForestParams
 from pyspark.mllib.common import inherit_doc
 
 
-__all__ = ['LogisticRegression', 'LogisticRegressionModel']
+__all__ = ['LogisticRegression', 'LogisticRegressionModel', 'DecisionTreeClassifier',
+           'DecisionTreeClassificationModel', 'GBTClassifier', 'GBTClassificationModel',
+           'RandomForestClassifier', 'RandomForestClassificationModel']
 
 
 @inherit_doc
 class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                         HasRegParam):
+                         HasRegParam, HasTol, HasProbabilityCol):
     """
     Logistic regression.
 
@@ -50,25 +52,49 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     TypeError: Method setParams forces keyword arguments.
     """
     _java_class = "org.apache.spark.ml.classification.LogisticRegression"
+    # a placeholder to make it appear in the generated doc
+    elasticNetParam = \
+        Param(Params._dummy(), "elasticNetParam",
+              "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+              "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+    fitIntercept = Param(Params._dummy(), "fitIntercept", "whether to fit an intercept term.")
+    threshold = Param(Params._dummy(), "threshold",
+                      "threshold in binary classification prediction, in range [0, 1].")
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxIter=100, regParam=0.1):
+                 maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 threshold=0.5, probabilityCol="probability"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxIter=100, regParam=0.1)
+                 maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 threshold=0.5, probabilityCol="probability")
         """
         super(LogisticRegression, self).__init__()
-        self._setDefault(maxIter=100, regParam=0.1)
+        #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
+        #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
+        self.elasticNetParam = \
+            Param(self, "elasticNetParam",
+                  "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " +
+                  "is an L2 penalty. For alpha = 1, it is an L1 penalty.")
+        #: param for whether to fit an intercept term.
+        self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.")
+        #: param for threshold in binary classification prediction, in range [0, 1].
+        self.threshold = Param(self, "threshold",
+                               "threshold in binary classification prediction, in range [0, 1].")
+        self._setDefault(maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1E-6,
+                         fitIntercept=True, threshold=0.5)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxIter=100, regParam=0.1):
+                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                  threshold=0.5, probabilityCol="probability"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxIter=100, regParam=0.1)
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 threshold=0.5, probabilityCol="probability")
         Sets params for logistic regression.
         """
         kwargs = self.setParams._input_kwargs
@@ -77,6 +103,45 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return LogisticRegressionModel(java_model)
 
+    def setElasticNetParam(self, value):
+        """
+        Sets the value of :py:attr:`elasticNetParam`.
+        """
+        self.paramMap[self.elasticNetParam] = value
+        return self
+
+    def getElasticNetParam(self):
+        """
+        Gets the value of elasticNetParam or its default value.
+        """
+        return self.getOrDefault(self.elasticNetParam)
+
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        self.paramMap[self.fitIntercept] = value
+        return self
+
+    def getFitIntercept(self):
+        """
+        Gets the value of fitIntercept or its default value.
+        """
+        return self.getOrDefault(self.fitIntercept)
+
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        self.paramMap[self.threshold] = value
+        return self
+
+    def getThreshold(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.threshold)
+
 
 class LogisticRegressionModel(JavaModel):
     """
@@ -84,6 +149,399 @@ class LogisticRegressionModel(JavaModel):
     """
 
 
+class TreeClassifierParams(object):
+    """
+    Private class to track supported impurity measures.
+    """
+    supportedImpurities = ["entropy", "gini"]
+
+
+class GBTParams(object):
+    """
+    Private class to track supported GBT params.
+    """
+    supportedLossTypes = ["logistic"]
+
+
+@inherit_doc
+class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                             DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree`
+    learning algorithm for classification.
+    It supports both binary and multiclass labels, as well as both continuous and categorical
+    features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.feature import StringIndexer
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model = stringIndexer.fit(df)
+    >>> td = si_model.transform(df)
+    >>> dt = DecisionTreeClassifier(maxDepth=2, labelCol="indexed")
+    >>> model = dt.fit(td)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.classification.DecisionTreeClassifier"
+    # a placeholder to make it appear in the generated doc
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities))
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
+        """
+        super(DecisionTreeClassifier, self).__init__()
+        #: param for Criterion used for information gain calculation (case-insensitive).
+        self.impurity = \
+            Param(self, "impurity",
+                  "Criterion used for information gain calculation (case-insensitive). " +
+                  "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities))
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="gini")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="gini"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="gini")
+        Sets params for the DecisionTreeClassifier.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return DecisionTreeClassificationModel(java_model)
+
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        self.paramMap[self.impurity] = value
+        return self
+
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class DecisionTreeClassificationModel(JavaModel):
+    """
+    Model fitted by DecisionTreeClassifier.
+    """
+
+
+@inherit_doc
+class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
+                             DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Random_forest  Random Forest`
+    learning algorithm for classification.
+    It supports both binary and multiclass labels, as well as both continuous and categorical
+    features.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.feature import StringIndexer
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model = stringIndexer.fit(df)
+    >>> td = si_model.transform(df)
+    >>> rf = RandomForestClassifier(numTrees=2, maxDepth=2, labelCol="indexed")
+    >>> model = rf.fit(td)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.classification.RandomForestClassifier"
+    # a placeholder to make it appear in the generated doc
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities))
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the training data used for learning each decision tree, " +
+                            "in range (0, 1].")
+    numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1)")
+    featureSubsetStrategy = \
+        Param(Params._dummy(), "featureSubsetStrategy",
+              "The number of features to consider for splits at each tree node. Supported " +
+              "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+                 numTrees=20, featureSubsetStrategy="auto", seed=42):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+                 numTrees=20, featureSubsetStrategy="auto", seed=42)
+        """
+        super(RandomForestClassifier, self).__init__()
+        #: param for Criterion used for information gain calculation (case-insensitive).
+        self.impurity = \
+            Param(self, "impurity",
+                  "Criterion used for information gain calculation (case-insensitive). " +
+                  "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities))
+        #: param for Fraction of the training data used for learning each decision tree,
+        #  in range (0, 1]
+        self.subsamplingRate = Param(self, "subsamplingRate",
+                                     "Fraction of the training data used for learning each " +
+                                     "decision tree, in range (0, 1].")
+        #: param for Number of trees to train (>= 1)
+        self.numTrees = Param(self, "numTrees", "Number of trees to train (>= 1)")
+        #: param for The number of features to consider for splits at each tree node
+        self.featureSubsetStrategy = \
+            Param(self, "featureSubsetStrategy",
+                  "The number of features to consider for splits at each tree node. Supported " +
+                  "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                         impurity="gini", numTrees=20, featureSubsetStrategy="auto")
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  impurity="gini", numTrees=20, featureSubsetStrategy="auto")
+        Sets params for linear classification.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return RandomForestClassificationModel(java_model)
+
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        self.paramMap[self.impurity] = value
+        return self
+
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        self.paramMap[self.subsamplingRate] = value
+        return self
+
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    def setNumTrees(self, value):
+        """
+        Sets the value of :py:attr:`numTrees`.
+        """
+        self.paramMap[self.numTrees] = value
+        return self
+
+    def getNumTrees(self):
+        """
+        Gets the value of numTrees or its default value.
+        """
+        return self.getOrDefault(self.numTrees)
+
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        self.paramMap[self.featureSubsetStrategy] = value
+        return self
+
+    def getFeatureSubsetStrategy(self):
+        """
+        Gets the value of featureSubsetStrategy or its default value.
+        """
+        return self.getOrDefault(self.featureSubsetStrategy)
+
+
+class RandomForestClassificationModel(JavaModel):
+    """
+    Model fitted by RandomForestClassifier.
+    """
+
+
+@inherit_doc
+class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                    DecisionTreeParams, HasCheckpointInterval):
+    """
+    `http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)`
+    learning algorithm for classification.
+    It supports binary labels, as well as both continuous and categorical features.
+    Note: Multiclass labels are not currently supported.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.feature import StringIndexer
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model = stringIndexer.fit(df)
+    >>> td = si_model.transform(df)
+    >>> gbt = GBTClassifier(maxIter=5, maxDepth=2, labelCol="indexed")
+    >>> model = gbt.fit(td)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    """
+
+    _java_class = "org.apache.spark.ml.classification.GBTClassifier"
+    # a placeholder to make it appear in the generated doc
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the training data used for learning each decision tree, " +
+                            "in range (0, 1].")
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the " +
+                     "contribution of each estimator")
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
+                 maxIter=20, stepSize=0.1):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
+                 maxIter=20, stepSize=0.1)
+        """
+        super(GBTClassifier, self).__init__()
+        #: param for Loss function which GBT tries to minimize (case-insensitive).
+        self.lossType = Param(self, "lossType",
+                              "Loss function which GBT tries to minimize (case-insensitive). " +
+                              "Supported options: " + ", ".join(GBTParams.supportedLossTypes))
+        #: Fraction of the training data used for learning each decision tree, in range (0, 1].
+        self.subsamplingRate = Param(self, "subsamplingRate",
+                                     "Fraction of the training data used for learning each " +
+                                     "decision tree, in range (0, 1].")
+        #: Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of
+        #  each estimator
+        self.stepSize = Param(self, "stepSize",
+                              "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                              "the contribution of each estimator")
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         lossType="logistic", maxIter=20, stepSize=0.1)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="logistic", maxIter=20, stepSize=0.1):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="logistic", maxIter=20, stepSize=0.1)
+        Sets params for Gradient Boosted Tree Classification.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return GBTClassificationModel(java_model)
+
+    def setLossType(self, value):
+        """
+        Sets the value of :py:attr:`lossType`.
+        """
+        self.paramMap[self.lossType] = value
+        return self
+
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        self.paramMap[self.subsamplingRate] = value
+        return self
+
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        self.paramMap[self.stepSize] = value
+        return self
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+
+class GBTClassificationModel(JavaModel):
+    """
+    Model fitted by GBTClassifier.
+    """
+
+
 if __name__ == "__main__":
     import doctest
     from pyspark.context import SparkContext
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 4a5cc6e64f023..6fa9b8c2cf367 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -109,6 +109,9 @@ def get$Name(self):
         ("featuresCol", "features column name", "'features'"),
         ("labelCol", "label column name", "'label'"),
         ("predictionCol", "prediction column name", "'prediction'"),
+        ("probabilityCol", "Column name for predicted class conditional probabilities. " +
+         "Note: Not all models output well-calibrated probability estimates! These probabilities " +
+         "should be treated as confidences, not precise probabilities.", "'probability'"),
         ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
@@ -156,6 +159,7 @@ def __init__(self):
     for name, doc in decisionTreeParams:
         variable = paramTemplate.replace("$name", name).replace("$doc", doc)
         dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n    "
+        realParams += "#: param for " + doc + "\n        "
         realParams += "self." + variable.replace("$owner", "self") + "\n        "
         dtParamMethods += _gen_param_code(name, doc, None) + "\n"
     code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders)
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 779cabe853f8e..b116f05a068d3 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -165,6 +165,35 @@ def getPredictionCol(self):
         return self.getOrDefault(self.predictionCol)
 
 
+class HasProbabilityCol(Params):
+    """
+    Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities..
+    """
+
+    # a placeholder to make it appear in the generated doc
+    probabilityCol = Param(Params._dummy(), "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
+
+    def __init__(self):
+        super(HasProbabilityCol, self).__init__()
+        #: param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
+        self.probabilityCol = Param(self, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
+        if 'probability' is not None:
+            self._setDefault(probabilityCol='probability')
+
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        self.paramMap[self.probabilityCol] = value
+        return self
+
+    def getProbabilityCol(self):
+        """
+        Gets the value of probabilityCol or its default value.
+        """
+        return self.getOrDefault(self.probabilityCol)
+
+
 class HasRawPredictionCol(Params):
     """
     Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.

From 59250fe51486908f9e3f3d9ef10aadbcb9b4d62d Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Wed, 13 May 2015 16:13:48 -0700
Subject: [PATCH 150/320] [SPARK-7303] [SQL] push down project if possible when
 the child is sort

Optimize the case of `project(_, sort)` , a example is:

`select key from (select * from testData order by key) t`

before this PR:
```
== Parsed Logical Plan ==
'Project ['key]
 'Subquery t
  'Sort ['key ASC], true
   'Project [*]
    'UnresolvedRelation [testData], None

== Analyzed Logical Plan ==
Project [key#0]
 Subquery t
  Sort [key#0 ASC], true
   Project [key#0,value#1]
    Subquery testData
     LogicalRDD [key#0,value#1], MapPartitionsRDD[1]

== Optimized Logical Plan ==
Project [key#0]
 Sort [key#0 ASC], true
  LogicalRDD [key#0,value#1], MapPartitionsRDD[1]

== Physical Plan ==
Project [key#0]
 Sort [key#0 ASC], true
  Exchange (RangePartitioning [key#0 ASC], 5), []
   PhysicalRDD [key#0,value#1], MapPartitionsRDD[1]
```

after this PR
```
== Parsed Logical Plan ==
'Project ['key]
 'Subquery t
  'Sort ['key ASC], true
   'Project [*]
    'UnresolvedRelation [testData], None

== Analyzed Logical Plan ==
Project [key#0]
 Subquery t
  Sort [key#0 ASC], true
   Project [key#0,value#1]
    Subquery testData
     LogicalRDD [key#0,value#1], MapPartitionsRDD[1]

== Optimized Logical Plan ==
Sort [key#0 ASC], true
 Project [key#0]
  LogicalRDD [key#0,value#1], MapPartitionsRDD[1]

== Physical Plan ==
Sort [key#0 ASC], true
 Exchange (RangePartitioning [key#0 ASC], 5), []
  Project [key#0]
   PhysicalRDD [key#0,value#1], MapPartitionsRDD[1]
```

with this rule we will first do column pruning on the table and then do sorting.

Author: scwf <wangfei1@huawei.com>

This patch had conflicts when merged, resolved by
Committer: Michael Armbrust <michael@databricks.com>

Closes #5838 from scwf/pruning and squashes the following commits:

b00d833 [scwf] address michael's comment
e230155 [scwf] fix tests failure
b09b895 [scwf] improve column pruning
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  5 +++
 .../optimizer/FilterPushdownSuite.scala       | 36 ++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b163707cc9925..c2818d957cc79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -156,6 +156,11 @@ object ColumnPruning extends Rule[LogicalPlan] {
     case Project(projectList, Limit(exp, child)) =>
       Limit(exp, Project(projectList, child))
 
+    // push down project if possible when the child is sort
+    case p @ Project(projectList, s @ Sort(_, _, grandChild))
+      if s.references.subsetOf(p.outputSet) =>
+      s.copy(child = Project(projectList, grandChild))
+
     // Eliminate no-op Projects
     case Project(projectList, child) if child.output == projectList => child
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 0c428f7231b8e..be33cb9bb8eaa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
-import org.apache.spark.sql.catalyst.expressions.{Count, Explode}
+import org.apache.spark.sql.catalyst.expressions.{SortOrder, Ascending, Count, Explode}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.{LeftSemi, PlanTest, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.rules._
@@ -542,4 +542,38 @@ class FilterPushdownSuite extends PlanTest {
 
     comparePlans(optimized, originalQuery)
   }
+
+  test("push down project past sort") {
+    val x = testRelation.subquery('x)
+
+    // push down valid
+    val originalQuery = {
+      x.select('a, 'b)
+       .sortBy(SortOrder('a, Ascending))
+       .select('a)
+    }
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer =
+      x.select('a)
+       .sortBy(SortOrder('a, Ascending)).analyze
+
+    comparePlans(optimized, analysis.EliminateSubQueries(correctAnswer))
+
+    // push down invalid
+    val originalQuery1 = {
+      x.select('a, 'b)
+        .sortBy(SortOrder('a, Ascending))
+        .select('b)
+    }
+
+    val optimized1 = Optimize.execute(originalQuery1.analyze)
+    val correctAnswer1 =
+      x.select('a, 'b)
+        .sortBy(SortOrder('a, Ascending))
+        .select('b).analyze
+
+    comparePlans(optimized1, analysis.EliminateSubQueries(correctAnswer1))
+
+  }
 }

From e683182c3e6347afdac0e5658487f80e5e054ef4 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 13 May 2015 16:15:31 -0700
Subject: [PATCH 151/320] [SQL] Move some classes into packages that are more
 appropriate.

JavaTypeInference into catalyst
types.DateUtils into catalyst
CacheManager into execution
DefaultParserDialect into catalyst

Author: Reynold Xin <rxin@databricks.com>

Closes #6108 from rxin/sql-rename and squashes the following commits:

3fc9613 [Reynold Xin] Fixed import ordering.
83d9ff4 [Reynold Xin] Fixed codegen tests.
e271e86 [Reynold Xin] mima
f4e24a6 [Reynold Xin] [SQL] Move some classes into packages that are more appropriate.
---
 project/MimaExcludes.scala                    |  5 ++-
 .../sql/catalyst/CatalystTypeConverters.scala |  1 +
 .../sql/catalyst}/JavaTypeInference.scala     |  4 +-
 .../spark/sql/catalyst/ParserDialect.scala    | 36 +++++++++++++++++
 .../spark/sql/catalyst/expressions/Cast.scala |  1 +
 .../expressions/codegen/CodeGenerator.scala   |  2 +-
 .../sql/catalyst/expressions/literals.scala   |  1 +
 .../{types => catalyst/util}/DateUtils.scala  |  2 +-
 .../apache/spark/sql/types/UTF8String.scala   | 17 +++++---
 .../ExpressionEvaluationSuite.scala           |  1 +
 .../scala/org/apache/spark/sql/Column.scala   |  2 +
 .../org/apache/spark/sql/SQLContext.scala     | 40 +------------------
 .../sql/{ => execution}/CacheManager.scala    |  5 ++-
 .../spark/sql/execution/pythonUdfs.scala      |  5 ++-
 .../org/apache/spark/sql/functions.scala      |  1 +
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   |  2 +-
 .../apache/spark/sql/json/JacksonParser.scala |  1 +
 .../org/apache/spark/sql/json/JsonRDD.scala   |  1 +
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  2 +-
 .../org/apache/spark/sql/json/JsonSuite.scala |  1 +
 .../spark/sql/parquet/ParquetIOSuite.scala    |  1 +
 .../spark/sql/hive/HiveInspectors.scala       |  1 +
 .../apache/spark/sql/hive/TableReader.scala   |  2 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |  3 +-
 24 files changed, 80 insertions(+), 57 deletions(-)
 rename sql/{core/src/main/scala/org/apache/spark/sql => catalyst/src/main/scala/org/apache/spark/sql/catalyst}/JavaTypeInference.scala (99%)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/{types => catalyst/util}/DateUtils.scala (98%)
 rename sql/core/src/main/scala/org/apache/spark/sql/{ => execution}/CacheManager.scala (97%)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index f31f0e554eee9..fba7290dcb0b5 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -123,7 +123,10 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingClassProblem](
               "org.apache.spark.sql.parquet.ParquetTestData$"),
             ProblemFilters.exclude[MissingClassProblem](
-              "org.apache.spark.sql.parquet.TestGroupWriteSupport")
+              "org.apache.spark.sql.parquet.TestGroupWriteSupport"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData$"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CacheManager")
           ) ++ Seq(
             // SPARK-7530 Added StreamingContext.getState()
             ProblemFilters.exclude[MissingMethodProblem](
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index a13e2f36a1a1f..75a493b248f6e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -23,6 +23,7 @@ import java.util.{Map => JavaMap}
 import scala.collection.mutable.HashMap
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
similarity index 99%
rename from sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 1ec874f79617c..625c8d3a62125 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.catalyst
 
 import java.beans.Introspector
 import java.lang.{Iterable => JIterable}
@@ -24,10 +24,8 @@ import java.util.{Iterator => JIterator, Map => JMap}
 import scala.language.existentials
 
 import com.google.common.reflect.TypeToken
-
 import org.apache.spark.sql.types._
 
-
 /**
  * Type-inference utilities for POJOs and Java collections.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
index 05a92b06f9fd9..554fb4eb25eb1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
@@ -31,3 +31,39 @@ abstract class ParserDialect {
   // this is the main function that will be implemented by sql parser.
   def parse(sqlText: String): LogicalPlan
 }
+
+/**
+ * Currently we support the default dialect named "sql", associated with the class
+ * [[DefaultParserDialect]]
+ *
+ * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
+ * {{{
+ *-- switch to "hiveql" dialect
+ *   spark-sql>SET spark.sql.dialect=hiveql;
+ *   spark-sql>SELECT * FROM src LIMIT 1;
+ *
+ *-- switch to "sql" dialect
+ *   spark-sql>SET spark.sql.dialect=sql;
+ *   spark-sql>SELECT * FROM src LIMIT 1;
+ *
+ *-- register the new SQL dialect
+ *   spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect;
+ *   spark-sql> SELECT * FROM src LIMIT 1;
+ *
+ *-- register the non-exist SQL dialect
+ *   spark-sql> SET spark.sql.dialect=NotExistedClass;
+ *   spark-sql> SELECT * FROM src LIMIT 1;
+ *
+ *-- Exception will be thrown and switch to dialect
+ *-- "sql" (for SQLContext) or
+ *-- "hiveql" (for HiveContext)
+ * }}}
+ */
+private[spark] class DefaultParserDialect extends ParserDialect {
+  @transient
+  protected val sqlParser = new SqlParser
+
+  override def parse(sqlText: String): LogicalPlan = {
+    sqlParser.parse(sqlText)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index adf941ab2a45f..d8cf2b2e32435 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
 import java.text.{DateFormat, SimpleDateFormat}
 
 import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 
 /** Cast the child expression to the target data type. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d17af0e7ff87e..ecb4c4b68f904 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -250,7 +250,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
       case Cast(child @ DateType(), StringType) =>
         child.castOrNull(c =>
           q"""org.apache.spark.sql.types.UTF8String(
-                org.apache.spark.sql.types.DateUtils.toString($c))""",
+                org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""",
           StringType)
 
       case Cast(child @ NumericType(), IntegerType) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 18cba4cc46707..5f8c7354aede1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 
 object Literal {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala
similarity index 98%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala
index d36a49159b87f..3f92be4a55d7d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.types
+package org.apache.spark.sql.catalyst.util
 
 import java.sql.Date
 import java.text.SimpleDateFormat
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
index fc02ba6c9c43e..bc9c37bf2d5d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
@@ -19,15 +19,18 @@ package org.apache.spark.sql.types
 
 import java.util.Arrays
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
- *  A UTF-8 String, as internal representation of StringType in SparkSQL
+ * :: DeveloperApi ::
+ * A UTF-8 String, as internal representation of StringType in SparkSQL
  *
- *  A String encoded in UTF-8 as an Array[Byte], which can be used for comparison,
- *  search, see http://en.wikipedia.org/wiki/UTF-8 for details.
+ * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison,
+ * search, see http://en.wikipedia.org/wiki/UTF-8 for details.
  *
- *  Note: This is not designed for general use cases, should not be used outside SQL.
+ * Note: This is not designed for general use cases, should not be used outside SQL.
  */
-
+@DeveloperApi
 final class UTF8String extends Ordered[UTF8String] with Serializable {
 
   private[this] var bytes: Array[Byte] = _
@@ -180,6 +183,10 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
   }
 }
 
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
 object UTF8String {
   // number of tailing bytes in a UTF8 sequence for a code point
   // see http://en.wikipedia.org/wiki/UTF-8, 192-256 of Byte 1
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 04fd261d16aa3..5c4a1527c27c9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.mathfuncs._
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 42f5bcda49cfb..8bf1320ccb71d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -346,6 +346,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def when(condition: Column, value: Any):Column = this.expr match {
     case CaseWhen(branches: Seq[Expression]) =>
@@ -374,6 +375,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
+   * @since 1.4.0
    */
   def otherwise(value: Any):Column = this.expr match {
     case CaseWhen(branches: Seq[Expression]) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0a148c7cd2d3b..521f3dc821795 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -33,6 +33,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.errors.DialectException
@@ -40,7 +41,6 @@ import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.ParserDialect
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, expressions}
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
 import org.apache.spark.sql.json._
@@ -50,42 +50,6 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 import org.apache.spark.{Partition, SparkContext}
 
-/**
- * Currently we support the default dialect named "sql", associated with the class
- * [[DefaultParserDialect]]
- *
- * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
- * {{{
- *-- switch to "hiveql" dialect
- *   spark-sql>SET spark.sql.dialect=hiveql;
- *   spark-sql>SELECT * FROM src LIMIT 1;
- *
- *-- switch to "sql" dialect
- *   spark-sql>SET spark.sql.dialect=sql;
- *   spark-sql>SELECT * FROM src LIMIT 1;
- *
- *-- register the new SQL dialect
- *   spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect;
- *   spark-sql> SELECT * FROM src LIMIT 1;
- *
- *-- register the non-exist SQL dialect
- *   spark-sql> SET spark.sql.dialect=NotExistedClass;
- *   spark-sql> SELECT * FROM src LIMIT 1;
- *
- *-- Exception will be thrown and switch to dialect
- *-- "sql" (for SQLContext) or
- *-- "hiveql" (for HiveContext)
- * }}}
- */
-private[spark] class DefaultParserDialect extends ParserDialect {
-  @transient
-  protected val sqlParser = new catalyst.SqlParser
-
-  override def parse(sqlText: String): LogicalPlan = {
-    sqlParser.parse(sqlText)
-  }
-}
-
 /**
  * The entry point for working with structured data (rows and columns) in Spark.  Allows the
  * creation of [[DataFrame]] objects as well as the execution of SQL queries.
@@ -1276,7 +1240,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
       val projectSet = AttributeSet(projectList.flatMap(_.references))
       val filterSet = AttributeSet(filterPredicates.flatMap(_.references))
       val filterCondition =
-        prunePushedDownFilters(filterPredicates).reduceLeftOption(expressions.And)
+        prunePushedDownFilters(filterPredicates).reduceLeftOption(catalyst.expressions.And)
 
       // Right now we still use a projection even if the only evaluation is applying an alias
       // to a column.  Since this is a no-op, it could be avoided. However, using this
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
similarity index 97%
rename from sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 18584c2dcf797..5fcc48a67948b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -15,18 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.execution
 
 import java.util.concurrent.locks.ReentrantReadWriteLock
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.columnar.InMemoryRelation
+import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
 
 /** Holds a cached logical plan and its data */
-private case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation)
+private[sql] case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation)
 
 /**
  * Provides support in a SQLContext for caching query results and automatically using these cached
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index 3dbc3837950e0..65dd7ba020fa3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -19,20 +19,21 @@ package org.apache.spark.sql.execution
 
 import java.util.{List => JList, Map => JMap}
 
-import org.apache.spark.rdd.RDD
-
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters._
 
 import net.razorvine.pickle.{Pickler, Unpickler}
+
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.python.{PythonBroadcast, PythonRDD}
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.{Accumulator, Logging => SparkLogging}
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 099e1d8f03272..4404ad8ad63a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -438,6 +438,7 @@ object functions {
    * }}}
    *
    * @group normal_funcs
+   * @since 1.4.0
    */
   def when(condition: Column, value: Any): Column = {
     CaseWhen(Seq(condition.expr, lit(value).expr))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index a03ade3881f59..40483d3ec7701 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -25,9 +25,9 @@ import org.apache.commons.lang3.StringUtils
 import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow}
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.sources._
-import org.apache.spark.util.Utils
 
 private[sql] object JDBCRDD extends Logging {
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
index a8e69ae61174f..81611513582a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
@@ -26,6 +26,7 @@ import com.fasterxml.jackson.core._
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.json.JacksonUtils.nextUntil
 import org.apache.spark.sql.types._
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index f62973d5fcfab..4c32710a17bc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -29,6 +29,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.Logging
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index ec0e76cde6f7c..8cdbe076cbd85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.sql
 
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.sql.catalyst.DefaultParserDialect
 import org.apache.spark.sql.catalyst.errors.DialectException
 import org.apache.spark.sql.execution.GeneratedAggregate
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext.{udf => _, _}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 263fafba930ce..b06e3385980f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -24,6 +24,7 @@ import com.fasterxml.jackson.core.JsonFactory
 import org.scalactic.Tolerance._
 
 import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.json.InferSchema.compatibleType
 import org.apache.spark.sql.sources.LogicalRelation
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
index 7c371dbc7d3c9..008443df216aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
@@ -35,6 +35,7 @@ import parquet.schema.{MessageType, MessageTypeParser}
 
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.sql.test.TestSQLContext.implicits._
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 74ae984f34866..7c7666f6e4b7c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.serde2.{io => hiveIo}
 import org.apache.hadoop.{io => hadoopIo}
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types
 import org.apache.spark.sql.types._
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index b69312f0f8717..0b6f7a334a715 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -35,7 +35,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.DateUtils
+import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.util.Utils
 
 /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1d6393a3fec85..eaa9d6aad1f31 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.apache.spark.sql.catalyst.DefaultParserDialect
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
 import org.apache.spark.sql.catalyst.errors.DialectException
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SQLConf}
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
@@ -26,7 +28,6 @@ import org.apache.spark.sql.hive.{HiveQLDialect, HiveShim, MetastoreRelation}
 import org.apache.spark.sql.parquet.FSBasedParquetRelation
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{AnalysisException, DefaultParserDialect, QueryTest, Row, SQLConf}
 
 case class Nested1(f1: Nested2)
 case class Nested2(f2: Nested3)

From f6e18388d993d99f768c6d547327e0720ec64224 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 16:27:48 -0700
Subject: [PATCH 152/320] [SPARK-7608] Clean up old state in
 RDDOperationGraphListener

This is necessary for streaming and long-running Spark applications. zsxwing tdas

Author: Andrew Or <andrew@databricks.com>

Closes #6125 from andrewor14/viz-listener-leak and squashes the following commits:

8660949 [Andrew Or] Fix thing + add tests
33c0843 [Andrew Or] Clean up old job state
---
 .../ui/scope/RDDOperationGraphListener.scala  | 30 +++++--
 .../RDDOperationGraphListenerSuite.scala      | 87 +++++++++++++++++++
 2 files changed, 108 insertions(+), 9 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
index 2884a49f31122..f0f7007d77a14 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
@@ -27,11 +27,16 @@ import org.apache.spark.ui.SparkUI
  * A SparkListener that constructs a DAG of RDD operations.
  */
 private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListener {
-  private val jobIdToStageIds = new mutable.HashMap[Int, Seq[Int]]
-  private val stageIdToGraph = new mutable.HashMap[Int, RDDOperationGraph]
-  private val stageIds = new mutable.ArrayBuffer[Int]
+  private[ui] val jobIdToStageIds = new mutable.HashMap[Int, Seq[Int]]
+  private[ui] val stageIdToGraph = new mutable.HashMap[Int, RDDOperationGraph]
+
+  // Keep track of the order in which these are inserted so we can remove old ones
+  private[ui] val jobIds = new mutable.ArrayBuffer[Int]
+  private[ui] val stageIds = new mutable.ArrayBuffer[Int]
 
   // How many jobs or stages to retain graph metadata for
+  private val retainedJobs =
+    conf.getInt("spark.ui.retainedJobs", SparkUI.DEFAULT_RETAINED_JOBS)
   private val retainedStages =
     conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES)
 
@@ -50,15 +55,22 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
   /** On job start, construct a RDDOperationGraph for each stage in the job for display later. */
   override def onJobStart(jobStart: SparkListenerJobStart): Unit = synchronized {
     val jobId = jobStart.jobId
-    val stageInfos = jobStart.stageInfos
+    jobIds += jobId
+    jobIdToStageIds(jobId) = jobStart.stageInfos.map(_.stageId).sorted
 
-    stageInfos.foreach { stageInfo =>
-      stageIds += stageInfo.stageId
-      stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo)
+    // Remove state for old jobs
+    if (jobIds.size >= retainedJobs) {
+      val toRemove = math.max(retainedJobs / 10, 1)
+      jobIds.take(toRemove).foreach { id => jobIdToStageIds.remove(id) }
+      jobIds.trimStart(toRemove)
     }
-    jobIdToStageIds(jobId) = stageInfos.map(_.stageId).sorted
+  }
 
-    // Remove graph metadata for old stages
+  /** Remove graph metadata for old stages */
+  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized {
+    val stageInfo = stageSubmitted.stageInfo
+    stageIds += stageInfo.stageId
+    stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo)
     if (stageIds.size >= retainedStages) {
       val toRemove = math.max(retainedStages / 10, 1)
       stageIds.take(toRemove).foreach { id => stageIdToGraph.remove(id) }
diff --git a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
new file mode 100644
index 0000000000000..619b38ac02676
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.scope
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkConf
+import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListenerStageSubmitted, StageInfo}
+
+class RDDOperationGraphListenerSuite extends FunSuite {
+  private var jobIdCounter = 0
+  private var stageIdCounter = 0
+
+  /** Run a job with the specified number of stages. */
+  private def runOneJob(numStages: Int, listener: RDDOperationGraphListener): Unit = {
+    assert(numStages > 0, "I will not run a job with 0 stages for you.")
+    val stageInfos = (0 until numStages).map { _ =>
+      val stageInfo = new StageInfo(stageIdCounter, 0, "s", 0, Seq.empty, Seq.empty, "d")
+      listener.onStageSubmitted(new SparkListenerStageSubmitted(stageInfo))
+      stageIdCounter += 1
+      stageInfo
+    }
+    listener.onJobStart(new SparkListenerJobStart(jobIdCounter, 0, stageInfos))
+    jobIdCounter += 1
+  }
+
+  test("listener cleans up metadata") {
+
+    val conf = new SparkConf()
+      .set("spark.ui.retainedStages", "10")
+      .set("spark.ui.retainedJobs", "10")
+
+    val listener = new RDDOperationGraphListener(conf)
+    assert(listener.jobIdToStageIds.isEmpty)
+    assert(listener.stageIdToGraph.isEmpty)
+    assert(listener.jobIds.isEmpty)
+    assert(listener.stageIds.isEmpty)
+
+    // Run a few jobs, but not enough for clean up yet
+    runOneJob(1, listener)
+    runOneJob(2, listener)
+    runOneJob(3, listener)
+    assert(listener.jobIdToStageIds.size === 3)
+    assert(listener.stageIdToGraph.size === 6)
+    assert(listener.jobIds.size === 3)
+    assert(listener.stageIds.size === 6)
+
+    // Run a few more, but this time the stages should be cleaned up, but not the jobs
+    runOneJob(5, listener)
+    runOneJob(100, listener)
+    assert(listener.jobIdToStageIds.size === 5)
+    assert(listener.stageIdToGraph.size === 9)
+    assert(listener.jobIds.size === 5)
+    assert(listener.stageIds.size === 9)
+
+    // Run a few more, but this time both jobs and stages should be cleaned up
+    (1 to 100).foreach { _ =>
+      runOneJob(1, listener)
+    }
+    assert(listener.jobIdToStageIds.size === 9)
+    assert(listener.stageIdToGraph.size === 9)
+    assert(listener.jobIds.size === 9)
+    assert(listener.stageIds.size === 9)
+
+    // Ensure we clean up old jobs and stages, not arbitrary ones
+    assert(!listener.jobIdToStageIds.contains(0))
+    assert(!listener.stageIdToGraph.contains(0))
+    assert(!listener.stageIds.contains(0))
+    assert(!listener.jobIds.contains(0))
+  }
+
+}

From f88ac701552a1a854247509db49d78f13515eae4 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 16:28:37 -0700
Subject: [PATCH 153/320] [SPARK-7399] Spark compilation error for scala 2.11

Subsequent fix following #5966. I tried this out locally.

Author: Andrew Or <andrew@databricks.com>

Closes #6129 from andrewor14/211-compilation and squashes the following commits:

713868f [Andrew Or] Fix compilation issue for scala 2.11
---
 .../main/scala/org/apache/spark/rdd/RDD.scala |  2 +-
 .../apache/spark/rdd/RDDOperationScope.scala  |  4 ++--
 .../spark/rdd/RDDOperationScopeSuite.scala    | 20 ++++++++++---------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 02a94baf372d9..f7fa37e4cdcdc 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1524,7 +1524,7 @@ abstract class RDD[T: ClassTag](
    * doCheckpoint() is called recursively on the parent RDDs.
    */
   private[spark] def doCheckpoint(): Unit = {
-    RDDOperationScope.withScope(sc, "checkpoint", false, true) {
+    RDDOperationScope.withScope(sc, "checkpoint", allowNesting = false, ignoreParent = true) {
       if (!doCheckpointCalled) {
         doCheckpointCalled = true
         if (checkpointData.isDefined) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 93ec606f2de7d..2725826f421f4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -96,7 +96,7 @@ private[spark] object RDDOperationScope {
       sc: SparkContext,
       allowNesting: Boolean = false)(body: => T): T = {
     val callerMethodName = Thread.currentThread.getStackTrace()(3).getMethodName
-    withScope[T](sc, callerMethodName, allowNesting)(body)
+    withScope[T](sc, callerMethodName, allowNesting, ignoreParent = false)(body)
   }
 
   /**
@@ -116,7 +116,7 @@ private[spark] object RDDOperationScope {
       sc: SparkContext,
       name: String,
       allowNesting: Boolean,
-      ignoreParent: Boolean = false)(body: => T): T = {
+      ignoreParent: Boolean)(body: => T): T = {
     // Save the old scope to restore it later
     val scopeKey = SparkContext.RDD_SCOPE_KEY
     val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
index d75ecbf1f0b4d..db465a6a9eb55 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
@@ -61,11 +61,11 @@ class RDDOperationScopeSuite extends FunSuite with BeforeAndAfter {
     var rdd1: MyCoolRDD = null
     var rdd2: MyCoolRDD = null
     var rdd3: MyCoolRDD = null
-    RDDOperationScope.withScope(sc, "scope1", allowNesting = false) {
+    RDDOperationScope.withScope(sc, "scope1", allowNesting = false, ignoreParent = false) {
       rdd1 = new MyCoolRDD(sc)
-      RDDOperationScope.withScope(sc, "scope2", allowNesting = false) {
+      RDDOperationScope.withScope(sc, "scope2", allowNesting = false, ignoreParent = false) {
         rdd2 = new MyCoolRDD(sc)
-        RDDOperationScope.withScope(sc, "scope3", allowNesting = false) {
+        RDDOperationScope.withScope(sc, "scope3", allowNesting = false, ignoreParent = false) {
           rdd3 = new MyCoolRDD(sc)
         }
       }
@@ -84,11 +84,13 @@ class RDDOperationScopeSuite extends FunSuite with BeforeAndAfter {
     var rdd1: MyCoolRDD = null
     var rdd2: MyCoolRDD = null
     var rdd3: MyCoolRDD = null
-    RDDOperationScope.withScope(sc, "scope1", allowNesting = true) { // allow nesting here
+    // allow nesting here
+    RDDOperationScope.withScope(sc, "scope1", allowNesting = true, ignoreParent = false) {
       rdd1 = new MyCoolRDD(sc)
-      RDDOperationScope.withScope(sc, "scope2", allowNesting = false) { // stop nesting here
+      // stop nesting here
+      RDDOperationScope.withScope(sc, "scope2", allowNesting = false, ignoreParent = false) {
         rdd2 = new MyCoolRDD(sc)
-        RDDOperationScope.withScope(sc, "scope3", allowNesting = false) {
+        RDDOperationScope.withScope(sc, "scope3", allowNesting = false, ignoreParent = false) {
           rdd3 = new MyCoolRDD(sc)
         }
       }
@@ -107,11 +109,11 @@ class RDDOperationScopeSuite extends FunSuite with BeforeAndAfter {
     var rdd1: MyCoolRDD = null
     var rdd2: MyCoolRDD = null
     var rdd3: MyCoolRDD = null
-    RDDOperationScope.withScope(sc, "scope1", allowNesting = true) {
+    RDDOperationScope.withScope(sc, "scope1", allowNesting = true, ignoreParent = false) {
       rdd1 = new MyCoolRDD(sc)
-      RDDOperationScope.withScope(sc, "scope2", allowNesting = true) {
+      RDDOperationScope.withScope(sc, "scope2", allowNesting = true, ignoreParent = false) {
         rdd2 = new MyCoolRDD(sc)
-        RDDOperationScope.withScope(sc, "scope3", allowNesting = true) {
+        RDDOperationScope.withScope(sc, "scope3", allowNesting = true, ignoreParent = false) {
           rdd3 = new MyCoolRDD(sc)
         }
       }

From 44403414d3e754f7b991c0bbeb4868edb4135aa2 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 16:29:10 -0700
Subject: [PATCH 154/320] [SPARK-7464] DAG visualization: highlight the same
 RDDs on hover

This is pretty useful for MLlib.

<img src="https://cloud.githubusercontent.com/assets/2133137/7599650/c7d03dd8-f8b8-11e4-8c0a-0a89e786c90f.png" width="400px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6100 from andrewor14/dag-viz-hover and squashes the following commits:

fefe2af [Andrew Or] Link tooltips for nodes that belong to the same RDD
90c6a7e [Andrew Or] Assign classes to clusters and nodes, not IDs
---
 .../apache/spark/ui/static/dagre-d3.min.js    |  2 +-
 .../apache/spark/ui/static/spark-dag-viz.css  |  4 +-
 .../apache/spark/ui/static/spark-dag-viz.js   | 47 ++++++++++++++-----
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
index acf2d93b718b2..c55f752620dfd 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
@@ -20,7 +20,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});var makeClusterIdentifier=function(v){return"cluster_"+v.replace(/^cluster/,"")};svgClusters.enter().append("g").attr("id",makeClusterIdentifier).attr("name",function(v){return g.node(v).label}).classed("cluster",true).style("opacity",0).append("rect");var sortedClusters=util.orderByRank(g,svgClusters.data());for(var i=0;i<sortedClusters.length;i++){var v=sortedClusters[i];var node=g.node(v);if(node.label){var thisGroup=selection.select("g.cluster#"+makeClusterIdentifier(v));labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),bbox=_.pick(labelDom.node().getBBox(),"width","height");node.paddingTop+=bbox.height;node.paddingTop+=util.getMaxChildPaddingTop(g,v)}}util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop});svgClusters.each(function(){var cluster=d3.select(this),label=cluster.select("g.label"),rect=cluster.select("rect"),bbox=label.node().getBBox(),labelW=bbox.width,labelH=bbox.height;var num=function(x){return parseFloat(x.toString().replace(/px$/,""))};var labelX=num(rect.attr("x"))+num(rect.attr("width"))-labelH/2-labelW/2;var labelY=num(rect.attr("y"))+labelH;label.attr("transform","translate("+labelX+","+labelY+")")})}},{"./label/add-label":18,"./lodash":20,"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("id",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).classed("node",true).style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var requiredWidth=0,requiredHeight=0;var nextNode=g.node(g.parent(v));while(nextNode){var tempGroup=thisGroup.append("g");var tempLabel=addLabel(tempGroup,nextNode);var tempBBox=tempLabel.node().getBBox();tempBBox.width-=50;requiredWidth=Math.max(requiredWidth,tempBBox.width);requiredHeight=Math.max(requiredHeight,tempBBox.height);tempLabel.remove();nextNode=g.node(g.parent(nextNode.label))}var shapeBBox=shapeSvg.node().getBBox();shapeBBox.width=Math.max(shapeBBox.width,requiredWidth);shapeBBox.height=Math.max(shapeBBox.height,requiredHeight);node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,getMaxChildPaddingTop:getMaxChildPaddingTop,orderByRank:orderByRank,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function getMaxChildPaddingTop(g,v){var maxPadding=0;var children=g.children(v);for(var i=0;i<children.length;i++){var child=g.node(children[i]);if(child.paddingTop&&child.paddingTop>maxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;i<children.length;i++){var thisRank=getRank(g,children[i])+1;if(thisRank>maxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){
+module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});var makeClusterIdentifier=function(v){return"cluster_"+v.replace(/^cluster/,"")};svgClusters.enter().append("g").attr("class",makeClusterIdentifier).attr("name",function(v){return g.node(v).label}).classed("cluster",true).style("opacity",0).append("rect");var sortedClusters=util.orderByRank(g,svgClusters.data());for(var i=0;i<sortedClusters.length;i++){var v=sortedClusters[i];var node=g.node(v);if(node.label){var thisGroup=selection.select("g.cluster."+makeClusterIdentifier(v));labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),bbox=_.pick(labelDom.node().getBBox(),"width","height");node.paddingTop+=bbox.height;node.paddingTop+=util.getMaxChildPaddingTop(g,v)}}util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop});svgClusters.each(function(){var cluster=d3.select(this),label=cluster.select("g.label"),rect=cluster.select("rect"),bbox=label.node().getBBox(),labelW=bbox.width,labelH=bbox.height;var num=function(x){return parseFloat(x.toString().replace(/px$/,""))};var labelX=num(rect.attr("x"))+num(rect.attr("width"))-labelH/2-labelW/2;var labelY=num(rect.attr("y"))+labelH;label.attr("transform","translate("+labelX+","+labelY+")")})}},{"./label/add-label":18,"./lodash":20,"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("class",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).classed("node",true).style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var requiredWidth=0,requiredHeight=0;var nextNode=g.node(g.parent(v));while(nextNode){var tempGroup=thisGroup.append("g");var tempLabel=addLabel(tempGroup,nextNode);var tempBBox=tempLabel.node().getBBox();tempBBox.width-=50;requiredWidth=Math.max(requiredWidth,tempBBox.width);requiredHeight=Math.max(requiredHeight,tempBBox.height);tempLabel.remove();nextNode=g.node(g.parent(nextNode.label))}var shapeBBox=shapeSvg.node().getBBox();shapeBBox.width=Math.max(shapeBBox.width,requiredWidth);shapeBBox.height=Math.max(shapeBBox.height,requiredHeight);node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,getMaxChildPaddingTop:getMaxChildPaddingTop,orderByRank:orderByRank,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function getMaxChildPaddingTop(g,v){var maxPadding=0;var children=g.children(v);for(var i=0;i<children.length;i++){var child=g.node(children[i]);if(child.paddingTop&&child.paddingTop>maxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;i<children.length;i++){var thisRank=getRank(g,children[i])+1;if(thisRank>maxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){
 parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;
 
 if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{};
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
index 18c72694f3e2d..1846acb742b98 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
@@ -57,7 +57,7 @@
   stroke-width: 1px;
 }
 
-#dag-viz-graph svg.job g.cluster[id*="stage"] rect {
+#dag-viz-graph svg.job g.cluster[class*="stage"] rect {
   fill: #FFFFFF;
   stroke: #FF99AC;
   stroke-width: 1px;
@@ -79,7 +79,7 @@
   stroke-width: 1px;
 }
 
-#dag-viz-graph svg.stage g.cluster[id*="stage"] rect {
+#dag-viz-graph svg.stage g.cluster[class*="stage"] rect {
   fill: #FFFFFF;
   stroke: #FFA6B6;
   stroke-width: 1px;
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index f7d0d3c61457c..e2ec00b9c3c0d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -108,7 +108,7 @@ function toggleDagViz(forJob) {
  * Output DOM hierarchy:
  *   div#dag-viz-graph >
  *   svg >
- *   g#cluster_stage_[stageId]
+ *   g.cluster_stage_[stageId]
  *
  * Note that the input metadata is populated by o.a.s.ui.UIUtils.showDagViz.
  * Any changes in the input format here must be reflected there.
@@ -137,7 +137,7 @@ function renderDagViz(forJob) {
   // Find cached RDDs and mark them as such
   metadataContainer().selectAll(".cached-rdd").each(function(v) {
     var nodeId = VizConstants.nodePrefix + d3.select(this).text();
-    svg.selectAll("#" + nodeId).classed("cached", true);
+    svg.selectAll("g." + nodeId).classed("cached", true);
   });
 
   resizeSvg(svg);
@@ -192,14 +192,10 @@ function renderDagVizForJob(svgContainer) {
     if (i > 0) {
       var existingStages = svgContainer
         .selectAll("g.cluster")
-        .filter("[id*=\"" + VizConstants.stageClusterPrefix + "\"]");
+        .filter("[class*=\"" + VizConstants.stageClusterPrefix + "\"]");
       if (!existingStages.empty()) {
         var lastStage = d3.select(existingStages[0].pop());
-        var lastStageId = lastStage.attr("id");
-        var lastStageWidth = toFloat(svgContainer
-          .select("#" + lastStageId)
-          .select("rect")
-          .attr("width"));
+        var lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
         var lastStagePosition = getAbsolutePosition(lastStage);
         var offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
         container.attr("transform", "translate(" + offset + ", 0)");
@@ -372,14 +368,14 @@ function getAbsolutePosition(d3selection) {
 function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
   var fromNodeId = VizConstants.nodePrefix + fromRDDId;
   var toNodeId = VizConstants.nodePrefix + toRDDId;
-  var fromPos = getAbsolutePosition(svgContainer.select("#" + fromNodeId));
-  var toPos = getAbsolutePosition(svgContainer.select("#" + toNodeId));
+  var fromPos = getAbsolutePosition(svgContainer.select("g." + fromNodeId));
+  var toPos = getAbsolutePosition(svgContainer.select("g." + toNodeId));
 
   // On the job page, RDDs are rendered as dots (circles). When rendering the path,
   // we need to account for the radii of these circles. Otherwise the arrow heads
   // will bleed into the circle itself.
   var delta = toFloat(svgContainer
-    .select("g.node#" + toNodeId)
+    .select("g.node." + toNodeId)
     .select("circle")
     .attr("r"));
   if (fromPos.x < toPos.x) {
@@ -431,10 +427,35 @@ function addTooltipsForRDDs(svgContainer) {
       node.select("circle")
         .attr("data-toggle", "tooltip")
         .attr("data-placement", "bottom")
-        .attr("title", tooltipText)
+        .attr("title", tooltipText);
     }
+    // Link tooltips for all nodes that belong to the same RDD
+    node.on("mouseenter", function() { triggerTooltipForRDD(node, true); });
+    node.on("mouseleave", function() { triggerTooltipForRDD(node, false); });
   });
-  $("[data-toggle=tooltip]").tooltip({container: "body"});
+
+  $("[data-toggle=tooltip]")
+    .filter("g.node circle")
+    .tooltip({ container: "body", trigger: "manual" });
+}
+
+/*
+ * (Job page only) Helper function to show or hide tooltips for all nodes
+ * in the graph that refer to the same RDD the specified node represents.
+ */
+function triggerTooltipForRDD(d3node, show) {
+  var classes = d3node.node().classList;
+  for (var i = 0; i < classes.length; i++) {
+    var clazz = classes[i];
+    var isRDDClass = clazz.indexOf(VizConstants.nodePrefix) == 0;
+    if (isRDDClass) {
+      graphContainer().selectAll("g." + clazz).each(function() {
+        var circle = d3.select(this).select("circle").node();
+        var showOrHide = show ? "show" : "hide";
+        $(circle).tooltip(showOrHide);
+      });
+    }
+  }
 }
 
 /* Helper function to convert attributes to numeric values. */

From aa1837875a3febad2f22b91a294f91749852b42f Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 16:29:52 -0700
Subject: [PATCH 155/320] [SPARK-7502] DAG visualization: gracefully handle
 removed stages

Old stages are removed without much feedback to the user. This happens very often in streaming. See screenshots below for more detail. zsxwing

**Before**

<img src="https://cloud.githubusercontent.com/assets/2133137/7621031/643cc1e0-f978-11e4-8f42-09decaac44a7.png" width="500px"/>

-------------------------
**After**
<img src="https://cloud.githubusercontent.com/assets/2133137/7621037/6e37348c-f978-11e4-84a5-e44e154f9b13.png" width="400px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6132 from andrewor14/dag-viz-remove-gracefully and squashes the following commits:

43175cd [Andrew Or] Handle removed jobs and stages gracefully
---
 .../apache/spark/ui/static/spark-dag-viz.css   |  4 ++++
 .../apache/spark/ui/static/spark-dag-viz.js    | 18 +++++++++++++-----
 .../ui/scope/RDDOperationGraphListener.scala   | 11 ++++++++---
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
index 1846acb742b98..eedefb44b96fc 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
@@ -44,6 +44,10 @@
   stroke-width: 1px;
 }
 
+#dag-viz-graph div#empty-dag-viz-message {
+  margin: 15px;
+}
+
 /* Job page specific styles */
 
 #dag-viz-graph svg.job marker#marker-arrow path {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index e2ec00b9c3c0d..8138eb0d4f390 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -86,7 +86,7 @@ function toggleDagViz(forJob) {
   $(arrowSelector).toggleClass('arrow-open');
   var shouldShow = $(arrowSelector).hasClass("arrow-open");
   if (shouldShow) {
-    var shouldRender = graphContainer().select("svg").empty();
+    var shouldRender = graphContainer().select("*").empty();
     if (shouldRender) {
       renderDagViz(forJob);
     }
@@ -117,10 +117,18 @@ function renderDagViz(forJob) {
 
   // If there is not a dot file to render, fail fast and report error
   var jobOrStage = forJob ? "job" : "stage";
-  if (metadataContainer().empty()) {
-    graphContainer()
-      .append("div")
-      .text("No visualization information available for this " + jobOrStage);
+  if (metadataContainer().empty() ||
+      metadataContainer().selectAll("div").empty()) {
+    var message =
+      "<b>No visualization information available for this " + jobOrStage + "!</b><br/>" +
+      "If this is an old " + jobOrStage + ", its visualization metadata may have been " +
+      "cleaned up over time.<br/> You may consider increasing the value of ";
+    if (forJob) {
+      message += "<i>spark.ui.retainedJobs</i> and <i>spark.ui.retainedStages</i>.";
+    } else {
+      message += "<i>spark.ui.retainedStages</i>";
+    }
+    graphContainer().append("div").attr("id", "empty-dag-viz-message").html(message);
     return;
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
index f0f7007d77a14..3b77a1e12cc45 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
@@ -42,9 +42,14 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */
   def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = {
-    jobIdToStageIds.get(jobId)
-      .map { sids => sids.flatMap { sid => stageIdToGraph.get(sid) } }
-      .getOrElse { Seq.empty }
+    val stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty }
+    val graphs = stageIds.flatMap { sid => stageIdToGraph.get(sid) }
+    // If the metadata for some stages have been removed, do not bother rendering this job
+    if (stageIds.size != graphs.size) {
+      Seq.empty
+    } else {
+      graphs
+    }
   }
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */

From bb6dec3b160b54488892a509965fee70a530deff Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 16:31:24 -0700
Subject: [PATCH 156/320] [STREAMING] [MINOR] Keep streaming.UIUtils private

zsxwing

Author: Andrew Or <andrew@databricks.com>

Closes #6134 from andrewor14/private-streaming-uiutils and squashes the following commits:

225df94 [Andrew Or] Privatize class
---
 .../src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
index c206f973b2c66..f153ee105a18e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.ui
 
 import java.util.concurrent.TimeUnit
 
-object UIUtils {
+private[streaming] object UIUtils {
 
   /**
    * Return the short string for a `TimeUnit`.

From 61d1e87c0d3d12dac0b724d1b84436f748227e99 Mon Sep 17 00:00:00 2001
From: Hari Shreedharan <hshreedharan@apache.org>
Date: Wed, 13 May 2015 16:43:30 -0700
Subject: [PATCH 157/320] [SPARK-7356] [STREAMING] Fix flakey tests in
 FlumePollingStreamSuite using SparkSink's batch CountDownLatch.

This is meant to make the FlumePollingStreamSuite deterministic. Now we basically count the number of batches that have been completed - and then verify the results rather than sleeping for random periods of time.

Author: Hari Shreedharan <hshreedharan@apache.org>

Closes #5918 from harishreedharan/flume-test-fix and squashes the following commits:

93f24f3 [Hari Shreedharan] Add an eventually block to ensure that all received data is processed. Refactor the dstream creation and remove redundant code.
1108804 [Hari Shreedharan] [SPARK-7356][STREAMING] Fix flakey tests in FlumePollingStreamSuite using SparkSink's batch CountDownLatch.
---
 .../flume/FlumePollingStreamSuite.scala       | 110 ++++++++----------
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
index 43c1b865b64a1..93afe50c2134f 100644
--- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala
@@ -18,15 +18,18 @@
 package org.apache.spark.streaming.flume
 
 import java.net.InetSocketAddress
-import java.util.concurrent.{Callable, ExecutorCompletionService, Executors}
+import java.util.concurrent._
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.concurrent.duration._
+import scala.language.postfixOps
 
 import org.apache.flume.Context
 import org.apache.flume.channel.MemoryChannel
 import org.apache.flume.conf.Configurables
 import org.apache.flume.event.EventBuilder
+import org.scalatest.concurrent.Eventually._
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
@@ -57,11 +60,11 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
 
   before(beforeFunction())
 
-  ignore("flume polling test") {
+  test("flume polling test") {
     testMultipleTimes(testFlumePolling)
   }
 
-  ignore("flume polling test multiple hosts") {
+  test("flume polling test multiple hosts") {
     testMultipleTimes(testFlumePollingMultipleHost)
   }
 
@@ -100,18 +103,8 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
     Configurables.configure(sink, context)
     sink.setChannel(channel)
     sink.start()
-    // Set up the streaming context and input streams
-    val ssc = new StreamingContext(conf, batchDuration)
-    val flumeStream: ReceiverInputDStream[SparkFlumeEvent] =
-      FlumeUtils.createPollingStream(ssc, Seq(new InetSocketAddress("localhost", sink.getPort())),
-        StorageLevel.MEMORY_AND_DISK, eventsPerBatch, 1)
-    val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
-      with SynchronizedBuffer[Seq[SparkFlumeEvent]]
-    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
-    outputStream.register()
-    ssc.start()
 
-    writeAndVerify(Seq(channel), ssc, outputBuffer)
+    writeAndVerify(Seq(sink), Seq(channel))
     assertChannelIsEmpty(channel)
     sink.stop()
     channel.stop()
@@ -142,10 +135,22 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
     Configurables.configure(sink2, context)
     sink2.setChannel(channel2)
     sink2.start()
+    try {
+      writeAndVerify(Seq(sink, sink2), Seq(channel, channel2))
+      assertChannelIsEmpty(channel)
+      assertChannelIsEmpty(channel2)
+    } finally {
+      sink.stop()
+      sink2.stop()
+      channel.stop()
+      channel2.stop()
+    }
+  }
 
+  def writeAndVerify(sinks: Seq[SparkSink], channels: Seq[MemoryChannel]) {
     // Set up the streaming context and input streams
     val ssc = new StreamingContext(conf, batchDuration)
-    val addresses = Seq(sink.getPort(), sink2.getPort()).map(new InetSocketAddress("localhost", _))
+    val addresses = sinks.map(sink => new InetSocketAddress("localhost", sink.getPort()))
     val flumeStream: ReceiverInputDStream[SparkFlumeEvent] =
       FlumeUtils.createPollingStream(ssc, addresses, StorageLevel.MEMORY_AND_DISK,
         eventsPerBatch, 5)
@@ -155,61 +160,49 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
     outputStream.register()
 
     ssc.start()
-    try {
-      writeAndVerify(Seq(channel, channel2), ssc, outputBuffer)
-      assertChannelIsEmpty(channel)
-      assertChannelIsEmpty(channel2)
-    } finally {
-      sink.stop()
-      sink2.stop()
-      channel.stop()
-      channel2.stop()
-    }
-  }
-
-  def writeAndVerify(channels: Seq[MemoryChannel], ssc: StreamingContext,
-    outputBuffer: ArrayBuffer[Seq[SparkFlumeEvent]]) {
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     val executor = Executors.newCachedThreadPool()
     val executorCompletion = new ExecutorCompletionService[Void](executor)
-    channels.map(channel => {
+
+    val latch = new CountDownLatch(batchCount * channels.size)
+    sinks.foreach(_.countdownWhenBatchReceived(latch))
+
+    channels.foreach(channel => {
       executorCompletion.submit(new TxnSubmitter(channel, clock))
     })
+
     for (i <- 0 until channels.size) {
       executorCompletion.take()
     }
-    val startTime = System.currentTimeMillis()
-    while (outputBuffer.size < batchCount * channels.size &&
-      System.currentTimeMillis() - startTime < 15000) {
-      logInfo("output.size = " + outputBuffer.size)
-      Thread.sleep(100)
-    }
-    val timeTaken = System.currentTimeMillis() - startTime
-    assert(timeTaken < 15000, "Operation timed out after " + timeTaken + " ms")
-    logInfo("Stopping context")
-    ssc.stop()
 
-    val flattenedBuffer = outputBuffer.flatten
-    assert(flattenedBuffer.size === totalEventsPerChannel * channels.size)
-    var counter = 0
-    for (k <- 0 until channels.size; i <- 0 until totalEventsPerChannel) {
-      val eventToVerify = EventBuilder.withBody((channels(k).getName + " - " +
-        String.valueOf(i)).getBytes("utf-8"),
-        Map[String, String]("test-" + i.toString -> "header"))
-      var found = false
-      var j = 0
-      while (j < flattenedBuffer.size && !found) {
-        val strToCompare = new String(flattenedBuffer(j).event.getBody.array(), "utf-8")
-        if (new String(eventToVerify.getBody, "utf-8") == strToCompare &&
-          eventToVerify.getHeaders.get("test-" + i.toString)
-            .equals(flattenedBuffer(j).event.getHeaders.get("test-" + i.toString))) {
-          found = true
-          counter += 1
+    latch.await(15, TimeUnit.SECONDS) // Ensure all data has been received.
+    clock.advance(batchDuration.milliseconds)
+
+    // The eventually is required to ensure that all data in the batch has been processed.
+    eventually(timeout(10 seconds), interval(100 milliseconds)) {
+      val flattenedBuffer = outputBuffer.flatten
+      assert(flattenedBuffer.size === totalEventsPerChannel * channels.size)
+      var counter = 0
+      for (k <- 0 until channels.size; i <- 0 until totalEventsPerChannel) {
+        val eventToVerify = EventBuilder.withBody((channels(k).getName + " - " +
+          String.valueOf(i)).getBytes("utf-8"),
+          Map[String, String]("test-" + i.toString -> "header"))
+        var found = false
+        var j = 0
+        while (j < flattenedBuffer.size && !found) {
+          val strToCompare = new String(flattenedBuffer(j).event.getBody.array(), "utf-8")
+          if (new String(eventToVerify.getBody, "utf-8") == strToCompare &&
+            eventToVerify.getHeaders.get("test-" + i.toString)
+              .equals(flattenedBuffer(j).event.getHeaders.get("test-" + i.toString))) {
+            found = true
+            counter += 1
+          }
+          j += 1
         }
-        j += 1
       }
+      assert(counter === totalEventsPerChannel * channels.size)
     }
-    assert(counter === totalEventsPerChannel * channels.size)
+    ssc.stop()
   }
 
   def assertChannelIsEmpty(channel: MemoryChannel): Unit = {
@@ -234,7 +227,6 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging
         tx.commit()
         tx.close()
         Thread.sleep(500) // Allow some time for the events to reach
-        clock.advance(batchDuration.milliseconds)
       }
       null
     }

From 73bed408fbb47dfc28063afa3898c27fbdec7735 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 13 May 2015 17:07:31 -0700
Subject: [PATCH 158/320] [SPARK-7081] Faster sort-based shuffle path using
 binary processing cache-aware sort

This patch introduces a new shuffle manager that enhances the existing sort-based shuffle with a new cache-friendly sort algorithm that operates directly on binary data. The goals of this patch are to lower memory usage and Java object overheads during shuffle and to speed up sorting. It also lays groundwork for follow-up patches that will enable end-to-end processing of serialized records.

The new shuffle manager, `UnsafeShuffleManager`, can be enabled by setting `spark.shuffle.manager=tungsten-sort` in SparkConf.

The new shuffle manager uses directly-managed memory to implement several performance optimizations for certain types of shuffles. In cases where the new performance optimizations cannot be applied, the new shuffle manager delegates to SortShuffleManager to handle those shuffles.

UnsafeShuffleManager's optimizations will apply when _all_ of the following conditions hold:

 - The shuffle dependency specifies no aggregation or output ordering.
 - The shuffle serializer supports relocation of serialized values (this is currently supported
   by KryoSerializer and Spark SQL's custom serializers).
 - The shuffle produces fewer than 16777216 output partitions.
 - No individual record is larger than 128 MB when serialized.

In addition, extra spill-merging optimizations are automatically applied when the shuffle compression codec supports concatenation of serialized streams. This is currently supported by Spark's LZF serializer.

At a high-level, UnsafeShuffleManager's design is similar to Spark's existing SortShuffleManager.  In sort-based shuffle, incoming records are sorted according to their target partition ids, then written to a single map output file. Reducers fetch contiguous regions of this file in order to read their portion of the map output. In cases where the map output data is too large to fit in memory, sorted subsets of the output can are spilled to disk and those on-disk files are merged to produce the final output file.

UnsafeShuffleManager optimizes this process in several ways:

 - Its sort operates on serialized binary data rather than Java objects, which reduces memory consumption and GC overheads. This optimization requires the record serializer to have certain properties to allow serialized records to be re-ordered without requiring deserialization.  See SPARK-4550, where this optimization was first proposed and implemented, for more details.

 - It uses a specialized cache-efficient sorter (UnsafeShuffleExternalSorter) that sorts arrays of compressed record pointers and partition ids. By using only 8 bytes of space per record in the sorting array, this fits more of the array into cache.

 - The spill merging procedure operates on blocks of serialized records that belong to the same partition and does not need to deserialize records during the merge.

 - When the spill compression codec supports concatenation of compressed data, the spill merge simply concatenates the serialized and compressed spill partitions to produce the final output partition.  This allows efficient data copying methods, like NIO's `transferTo`, to be used and avoids the need to allocate decompression or copying buffers during the merge.

The shuffle read path is unchanged.

This patch is similar to [SPARK-4550](http://issues.apache.org/jira/browse/SPARK-4550) / #4450 but uses a slightly different implementation. The `unsafe`-based implementation featured in this patch lays the groundwork for followup patches that will enable sorting to operate on serialized data pages that will be prepared by Spark SQL's new `unsafe` operators (such as the new aggregation operator introduced in #5725).

### Future work

There are several tasks that build upon this patch, which will be left to future work:

- [SPARK-7271](https://issues.apache.org/jira/browse/SPARK-7271) Redesign / extend the shuffle interfaces to accept binary data as input. The goal here is to let us bypass serialization steps in cases where the sort input is produced by an operator that operates directly on binary data.
- Extension / redesign of the `Serializer` API. We can add new methods which allow serializers to determine the size requirements for serializing objects and for serializing objects directly to a specified memory address (similar to how `UnsafeRowConverter` works in Spark SQL).

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/5868)
<!-- Reviewable:end -->

Author: Josh Rosen <joshrosen@databricks.com>

Closes #5868 from JoshRosen/unsafe-sort and squashes the following commits:

ef0a86e [Josh Rosen] Fix scalastyle errors
7610f2f [Josh Rosen] Add tests for proper cleanup of shuffle data.
d494ffe [Josh Rosen] Fix deserialization of JavaSerializer instances.
52a9981 [Josh Rosen] Fix some bugs in the address packing code.
51812a7 [Josh Rosen] Change shuffle manager sort name to tungsten-sort
4023fa4 [Josh Rosen] Add @Private annotation to some Java classes.
de40b9d [Josh Rosen] More comments to try to explain metrics code
df07699 [Josh Rosen] Attempt to clarify confusing metrics update code
5e189c6 [Josh Rosen] Track time spend closing / flushing files; split TimeTrackingOutputStream into separate file.
d5779c6 [Josh Rosen] Merge remote-tracking branch 'origin/master' into unsafe-sort
c2ce78e [Josh Rosen] Fix a missed usage of MAX_PARTITION_ID
e3b8855 [Josh Rosen] Cleanup in UnsafeShuffleWriter
4a2c785 [Josh Rosen] rename 'sort buffer' to 'pointer array'
6276168 [Josh Rosen] Remove ability to disable spilling in UnsafeShuffleExternalSorter.
57312c9 [Josh Rosen] Clarify fileBufferSize units
2d4e4f4 [Josh Rosen] Address some minor comments in UnsafeShuffleExternalSorter.
fdcac08 [Josh Rosen] Guard against overflow when expanding sort buffer.
85da63f [Josh Rosen] Cleanup in UnsafeShuffleSorterIterator.
0ad34da [Josh Rosen] Fix off-by-one in nextInt() call
56781a1 [Josh Rosen] Rename UnsafeShuffleSorter to UnsafeShuffleInMemorySorter
e995d1a [Josh Rosen] Introduce MAX_SHUFFLE_OUTPUT_PARTITIONS.
e58a6b4 [Josh Rosen] Add more tests for PackedRecordPointer encoding.
4f0b770 [Josh Rosen] Attempt to implement proper shuffle write metrics.
d4e6d89 [Josh Rosen] Update to bit shifting constants
69d5899 [Josh Rosen] Remove some unnecessary override vals
8531286 [Josh Rosen] Add tests that automatically trigger spills.
7c953f9 [Josh Rosen] Add test that covers UnsafeShuffleSortDataFormat.swap().
e1855e5 [Josh Rosen] Fix a handful of misc. IntelliJ inspections
39434f9 [Josh Rosen] Avoid integer multiplication overflow in getMemoryUsage (thanks FindBugs!)
1e3ad52 [Josh Rosen] Delete unused ByteBufferOutputStream class.
ea4f85f [Josh Rosen] Roll back an unnecessary change in Spillable.
ae538dc [Josh Rosen] Document UnsafeShuffleManager.
ec6d626 [Josh Rosen] Add notes on maximum # of supported shuffle partitions.
0d4d199 [Josh Rosen] Bump up shuffle.memoryFraction to make tests pass.
b3b1924 [Josh Rosen] Properly implement close() and flush() in DummySerializerInstance.
1ef56c7 [Josh Rosen] Revise compression codec support in merger; test cross product of configurations.
b57c17f [Josh Rosen] Disable some overly-verbose logs that rendered DEBUG useless.
f780fb1 [Josh Rosen] Add test demonstrating which compression codecs support concatenation.
4a01c45 [Josh Rosen] Remove unnecessary log message
27b18b0 [Josh Rosen] That for inserting records AT the max record size.
fcd9a3c [Josh Rosen] Add notes + tests for maximum record / page sizes.
9d1ee7c [Josh Rosen] Fix MiMa excludes for ShuffleWriter change
fd4bb9e [Josh Rosen] Use own ByteBufferOutputStream rather than Kryo's
67d25ba [Josh Rosen] Update Exchange operator's copying logic to account for new shuffle manager
8f5061a [Josh Rosen] Strengthen assertion to check partitioning
01afc74 [Josh Rosen] Actually read data in UnsafeShuffleWriterSuite
1929a74 [Josh Rosen] Update to reflect upstream ShuffleBlockManager -> ShuffleBlockResolver rename.
e8718dd [Josh Rosen] Merge remote-tracking branch 'origin/master' into unsafe-sort
9b7ebed [Josh Rosen] More defensive programming RE: cleaning up spill files and memory after errors
7cd013b [Josh Rosen] Begin refactoring to enable proper tests for spilling.
722849b [Josh Rosen] Add workaround for transferTo() bug in merging code; refactor tests.
9883e30 [Josh Rosen] Merge remote-tracking branch 'origin/master' into unsafe-sort
b95e642 [Josh Rosen] Refactor and document logic that decides when to spill.
1ce1300 [Josh Rosen] More minor cleanup
5e8cf75 [Josh Rosen] More minor cleanup
e67f1ea [Josh Rosen] Remove upper type bound in ShuffleWriter interface.
cfe0ec4 [Josh Rosen] Address a number of minor review comments:
8a6fe52 [Josh Rosen] Rename UnsafeShuffleSpillWriter to UnsafeShuffleExternalSorter
11feeb6 [Josh Rosen] Update TODOs related to shuffle write metrics.
b674412 [Josh Rosen] Merge remote-tracking branch 'origin/master' into unsafe-sort
aaea17b [Josh Rosen] Add comments to UnsafeShuffleSpillWriter.
4f70141 [Josh Rosen] Fix merging; now passes UnsafeShuffleSuite tests.
133c8c9 [Josh Rosen] WIP towards testing UnsafeShuffleWriter.
f480fb2 [Josh Rosen] WIP in mega-refactoring towards shuffle-specific sort.
57f1ec0 [Josh Rosen] WIP towards packed record pointers for use in optimized shuffle sort.
69232fd [Josh Rosen] Enable compressible address encoding for off-heap mode.
7ee918e [Josh Rosen] Re-order imports in tests
3aeaff7 [Josh Rosen] More refactoring and cleanup; begin cleaning iterator interfaces
3490512 [Josh Rosen] Misc. cleanup
f156a8f [Josh Rosen] Hacky metrics integration; refactor some interfaces.
2776aca [Josh Rosen] First passing test for ExternalSorter.
5e100b2 [Josh Rosen] Super-messy WIP on external sort
595923a [Josh Rosen] Remove some unused variables.
8958584 [Josh Rosen] Fix bug in calculating free space in current page.
f17fa8f [Josh Rosen] Add missing newline
c2fca17 [Josh Rosen] Small refactoring of SerializerPropertiesSuite to enable test re-use:
b8a09fe [Josh Rosen] Back out accidental log4j.properties change
bfc12d3 [Josh Rosen] Add tests for serializer relocation property.
240864c [Josh Rosen] Remove PrefixComputer and require prefix to be specified as part of insert()
1433b42 [Josh Rosen] Store record length as int instead of long.
026b497 [Josh Rosen] Re-use a buffer in UnsafeShuffleWriter
0748458 [Josh Rosen] Port UnsafeShuffleWriter to Java.
87e721b [Josh Rosen] Renaming and comments
d3cc310 [Josh Rosen] Flag that SparkSqlSerializer2 supports relocation
e2d96ca [Josh Rosen] Expand serializer API and use new function to help control when new UnsafeShuffle path is used.
e267cee [Josh Rosen] Fix compilation of UnsafeSorterSuite
9c6cf58 [Josh Rosen] Refactor to use DiskBlockObjectWriter.
253f13e [Josh Rosen] More cleanup
8e3ec20 [Josh Rosen] Begin code cleanup.
4d2f5e1 [Josh Rosen] WIP
3db12de [Josh Rosen] Minor simplification and sanity checks in UnsafeSorter
767d3ca [Josh Rosen] Fix invalid range in UnsafeSorter.
e900152 [Josh Rosen] Add test for empty iterator in UnsafeSorter
57a4ea0 [Josh Rosen] Make initialSize configurable in UnsafeSorter
abf7bfe [Josh Rosen] Add basic test case.
81d52c5 [Josh Rosen] WIP on UnsafeSorter
---
 core/pom.xml                                  |  10 +
 .../unsafe/DummySerializerInstance.java       |  93 ++++
 .../shuffle/unsafe/PackedRecordPointer.java   |  92 +++
 .../spark/shuffle/unsafe/SpillInfo.java       |  37 ++
 .../unsafe/UnsafeShuffleExternalSorter.java   | 422 ++++++++++++++
 .../unsafe/UnsafeShuffleInMemorySorter.java   | 124 +++++
 .../unsafe/UnsafeShuffleSortDataFormat.java   |  67 +++
 .../shuffle/unsafe/UnsafeShuffleWriter.java   | 438 +++++++++++++++
 .../storage/TimeTrackingOutputStream.java     |  75 +++
 .../scala/org/apache/spark/SparkEnv.scala     |   3 +-
 .../spark/serializer/JavaSerializer.scala     |   2 +
 .../shuffle/FileShuffleBlockResolver.scala    |   2 +-
 .../apache/spark/shuffle/ShuffleWriter.scala  |   7 +-
 .../shuffle/hash/HashShuffleWriter.scala      |   2 +-
 .../shuffle/sort/SortShuffleManager.scala     |   2 +-
 .../shuffle/sort/SortShuffleWriter.scala      |   2 +-
 .../shuffle/unsafe/UnsafeShuffleManager.scala | 205 +++++++
 .../spark/storage/BlockObjectWriter.scala     |  24 +-
 .../collection/ExternalAppendOnlyMap.scala    |   2 +-
 .../util/collection/ExternalSorter.scala      |   2 +-
 .../unsafe/PackedRecordPointerSuite.java      | 101 ++++
 .../UnsafeShuffleInMemorySorterSuite.java     | 132 +++++
 .../unsafe/UnsafeShuffleWriterSuite.java      | 527 ++++++++++++++++++
 .../spark/io/CompressionCodecSuite.scala      |  44 ++
 .../serializer/JavaSerializerSuite.scala      |  29 +
 .../unsafe/UnsafeShuffleManagerSuite.scala    | 128 +++++
 .../shuffle/unsafe/UnsafeShuffleSuite.scala   | 105 ++++
 pom.xml                                       |  14 +-
 project/MimaExcludes.scala                    |   6 +
 .../apache/spark/sql/execution/Exchange.scala |  28 +-
 unsafe/pom.xml                                |   4 +
 .../unsafe/memory/TaskMemoryManager.java      |  79 ++-
 .../unsafe/memory/TaskMemoryManagerSuite.java |  23 +
 33 files changed, 2767 insertions(+), 64 deletions(-)
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java
 create mode 100644 core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java
 create mode 100644 core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java
 create mode 100644 core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala
 create mode 100644 core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java
 create mode 100644 core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java
 create mode 100644 core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
 create mode 100644 core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala
 create mode 100644 core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala
 create mode 100644 core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala

diff --git a/core/pom.xml b/core/pom.xml
index 262a3320db106..bfa49d0d6dc25 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -361,6 +361,16 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-library</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java
new file mode 100644
index 0000000000000..3f746b886bc9b
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import scala.reflect.ClassTag;
+
+import org.apache.spark.serializer.DeserializationStream;
+import org.apache.spark.serializer.SerializationStream;
+import org.apache.spark.serializer.SerializerInstance;
+import org.apache.spark.unsafe.PlatformDependent;
+
+/**
+ * Unfortunately, we need a serializer instance in order to construct a DiskBlockObjectWriter.
+ * Our shuffle write path doesn't actually use this serializer (since we end up calling the
+ * `write() OutputStream methods), but DiskBlockObjectWriter still calls some methods on it. To work
+ * around this, we pass a dummy no-op serializer.
+ */
+final class DummySerializerInstance extends SerializerInstance {
+
+  public static final DummySerializerInstance INSTANCE = new DummySerializerInstance();
+
+  private DummySerializerInstance() { }
+
+  @Override
+  public SerializationStream serializeStream(final OutputStream s) {
+    return new SerializationStream() {
+      @Override
+      public void flush() {
+        // Need to implement this because DiskObjectWriter uses it to flush the compression stream
+        try {
+          s.flush();
+        } catch (IOException e) {
+          PlatformDependent.throwException(e);
+        }
+      }
+
+      @Override
+      public <T> SerializationStream writeObject(T t, ClassTag<T> ev1) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void close() {
+        // Need to implement this because DiskObjectWriter uses it to close the compression stream
+        try {
+          s.close();
+        } catch (IOException e) {
+          PlatformDependent.throwException(e);
+        }
+      }
+    };
+  }
+
+  @Override
+  public <T> ByteBuffer serialize(T t, ClassTag<T> ev1) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public DeserializationStream deserializeStream(InputStream s) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public <T> T deserialize(ByteBuffer bytes, ClassLoader loader, ClassTag<T> ev1) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public <T> T deserialize(ByteBuffer bytes, ClassTag<T> ev1) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java
new file mode 100644
index 0000000000000..4ee6a82c0423e
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+/**
+ * Wrapper around an 8-byte word that holds a 24-bit partition number and 40-bit record pointer.
+ * <p>
+ * Within the long, the data is laid out as follows:
+ * <pre>
+ *   [24 bit partition number][13 bit memory page number][27 bit offset in page]
+ * </pre>
+ * This implies that the maximum addressable page size is 2^27 bits = 128 megabytes, assuming that
+ * our offsets in pages are not 8-byte-word-aligned. Since we have 2^13 pages (based off the
+ * 13-bit page numbers assigned by {@link org.apache.spark.unsafe.memory.TaskMemoryManager}), this
+ * implies that we can address 2^13 * 128 megabytes = 1 terabyte of RAM per task.
+ * <p>
+ * Assuming word-alignment would allow for a 1 gigabyte maximum page size, but we leave this
+ * optimization to future work as it will require more careful design to ensure that addresses are
+ * properly aligned (e.g. by padding records).
+ */
+final class PackedRecordPointer {
+
+  static final int MAXIMUM_PAGE_SIZE_BYTES = 1 << 27;  // 128 megabytes
+
+  /**
+   * The maximum partition identifier that can be encoded. Note that partition ids start from 0.
+   */
+  static final int MAXIMUM_PARTITION_ID = (1 << 24) - 1;  // 16777215
+
+  /** Bit mask for the lower 40 bits of a long. */
+  private static final long MASK_LONG_LOWER_40_BITS = (1L << 40) - 1;
+
+  /** Bit mask for the upper 24 bits of a long */
+  private static final long MASK_LONG_UPPER_24_BITS = ~MASK_LONG_LOWER_40_BITS;
+
+  /** Bit mask for the lower 27 bits of a long. */
+  private static final long MASK_LONG_LOWER_27_BITS = (1L << 27) - 1;
+
+  /** Bit mask for the lower 51 bits of a long. */
+  private static final long MASK_LONG_LOWER_51_BITS = (1L << 51) - 1;
+
+  /** Bit mask for the upper 13 bits of a long */
+  private static final long MASK_LONG_UPPER_13_BITS = ~MASK_LONG_LOWER_51_BITS;
+
+  /**
+   * Pack a record address and partition id into a single word.
+   *
+   * @param recordPointer a record pointer encoded by TaskMemoryManager.
+   * @param partitionId a shuffle partition id (maximum value of 2^24).
+   * @return a packed pointer that can be decoded using the {@link PackedRecordPointer} class.
+   */
+  public static long packPointer(long recordPointer, int partitionId) {
+    assert (partitionId <= MAXIMUM_PARTITION_ID);
+    // Note that without word alignment we can address 2^27 bytes = 128 megabytes per page.
+    // Also note that this relies on some internals of how TaskMemoryManager encodes its addresses.
+    final long pageNumber = (recordPointer & MASK_LONG_UPPER_13_BITS) >>> 24;
+    final long compressedAddress = pageNumber | (recordPointer & MASK_LONG_LOWER_27_BITS);
+    return (((long) partitionId) << 40) | compressedAddress;
+  }
+
+  private long packedRecordPointer;
+
+  public void set(long packedRecordPointer) {
+    this.packedRecordPointer = packedRecordPointer;
+  }
+
+  public int getPartitionId() {
+    return (int) ((packedRecordPointer & MASK_LONG_UPPER_24_BITS) >>> 40);
+  }
+
+  public long getRecordPointer() {
+    final long pageNumber = (packedRecordPointer << 24) & MASK_LONG_UPPER_13_BITS;
+    final long offsetInPage = packedRecordPointer & MASK_LONG_LOWER_27_BITS;
+    return pageNumber | offsetInPage;
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java
new file mode 100644
index 0000000000000..7bac0dc0bbeb6
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.io.File;
+
+import org.apache.spark.storage.TempShuffleBlockId;
+
+/**
+ * Metadata for a block of data written by {@link UnsafeShuffleExternalSorter}.
+ */
+final class SpillInfo {
+  final long[] partitionLengths;
+  final File file;
+  final TempShuffleBlockId blockId;
+
+  public SpillInfo(int numPartitions, File file, TempShuffleBlockId blockId) {
+    this.partitionLengths = new long[numPartitions];
+    this.file = file;
+    this.blockId = blockId;
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java
new file mode 100644
index 0000000000000..9e9ed94b7890c
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.LinkedList;
+
+import scala.Tuple2;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.TaskContext;
+import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.serializer.SerializerInstance;
+import org.apache.spark.shuffle.ShuffleMemoryManager;
+import org.apache.spark.storage.*;
+import org.apache.spark.unsafe.PlatformDependent;
+import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.unsafe.memory.TaskMemoryManager;
+import org.apache.spark.util.Utils;
+
+/**
+ * An external sorter that is specialized for sort-based shuffle.
+ * <p>
+ * Incoming records are appended to data pages. When all records have been inserted (or when the
+ * current thread's shuffle memory limit is reached), the in-memory records are sorted according to
+ * their partition ids (using a {@link UnsafeShuffleInMemorySorter}). The sorted records are then
+ * written to a single output file (or multiple files, if we've spilled). The format of the output
+ * files is the same as the format of the final output file written by
+ * {@link org.apache.spark.shuffle.sort.SortShuffleWriter}: each output partition's records are
+ * written as a single serialized, compressed stream that can be read with a new decompression and
+ * deserialization stream.
+ * <p>
+ * Unlike {@link org.apache.spark.util.collection.ExternalSorter}, this sorter does not merge its
+ * spill files. Instead, this merging is performed in {@link UnsafeShuffleWriter}, which uses a
+ * specialized merge procedure that avoids extra serialization/deserialization.
+ */
+final class UnsafeShuffleExternalSorter {
+
+  private final Logger logger = LoggerFactory.getLogger(UnsafeShuffleExternalSorter.class);
+
+  private static final int PAGE_SIZE = PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES;
+  @VisibleForTesting
+  static final int DISK_WRITE_BUFFER_SIZE = 1024 * 1024;
+  @VisibleForTesting
+  static final int MAX_RECORD_SIZE = PAGE_SIZE - 4;
+
+  private final int initialSize;
+  private final int numPartitions;
+  private final TaskMemoryManager memoryManager;
+  private final ShuffleMemoryManager shuffleMemoryManager;
+  private final BlockManager blockManager;
+  private final TaskContext taskContext;
+  private final ShuffleWriteMetrics writeMetrics;
+
+  /** The buffer size to use when writing spills using DiskBlockObjectWriter */
+  private final int fileBufferSizeBytes;
+
+  /**
+   * Memory pages that hold the records being sorted. The pages in this list are freed when
+   * spilling, although in principle we could recycle these pages across spills (on the other hand,
+   * this might not be necessary if we maintained a pool of re-usable pages in the TaskMemoryManager
+   * itself).
+   */
+  private final LinkedList<MemoryBlock> allocatedPages = new LinkedList<MemoryBlock>();
+
+  private final LinkedList<SpillInfo> spills = new LinkedList<SpillInfo>();
+
+  // These variables are reset after spilling:
+  private UnsafeShuffleInMemorySorter sorter;
+  private MemoryBlock currentPage = null;
+  private long currentPagePosition = -1;
+  private long freeSpaceInCurrentPage = 0;
+
+  public UnsafeShuffleExternalSorter(
+      TaskMemoryManager memoryManager,
+      ShuffleMemoryManager shuffleMemoryManager,
+      BlockManager blockManager,
+      TaskContext taskContext,
+      int initialSize,
+      int numPartitions,
+      SparkConf conf,
+      ShuffleWriteMetrics writeMetrics) throws IOException {
+    this.memoryManager = memoryManager;
+    this.shuffleMemoryManager = shuffleMemoryManager;
+    this.blockManager = blockManager;
+    this.taskContext = taskContext;
+    this.initialSize = initialSize;
+    this.numPartitions = numPartitions;
+    // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
+    this.fileBufferSizeBytes = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
+
+    this.writeMetrics = writeMetrics;
+    initializeForWriting();
+  }
+
+  /**
+   * Allocates new sort data structures. Called when creating the sorter and after each spill.
+   */
+  private void initializeForWriting() throws IOException {
+    // TODO: move this sizing calculation logic into a static method of sorter:
+    final long memoryRequested = initialSize * 8L;
+    final long memoryAcquired = shuffleMemoryManager.tryToAcquire(memoryRequested);
+    if (memoryAcquired != memoryRequested) {
+      shuffleMemoryManager.release(memoryAcquired);
+      throw new IOException("Could not acquire " + memoryRequested + " bytes of memory");
+    }
+
+    this.sorter = new UnsafeShuffleInMemorySorter(initialSize);
+  }
+
+  /**
+   * Sorts the in-memory records and writes the sorted records to an on-disk file.
+   * This method does not free the sort data structures.
+   *
+   * @param isLastFile if true, this indicates that we're writing the final output file and that the
+   *                   bytes written should be counted towards shuffle spill metrics rather than
+   *                   shuffle write metrics.
+   */
+  private void writeSortedFile(boolean isLastFile) throws IOException {
+
+    final ShuffleWriteMetrics writeMetricsToUse;
+
+    if (isLastFile) {
+      // We're writing the final non-spill file, so we _do_ want to count this as shuffle bytes.
+      writeMetricsToUse = writeMetrics;
+    } else {
+      // We're spilling, so bytes written should be counted towards spill rather than write.
+      // Create a dummy WriteMetrics object to absorb these metrics, since we don't want to count
+      // them towards shuffle bytes written.
+      writeMetricsToUse = new ShuffleWriteMetrics();
+    }
+
+    // This call performs the actual sort.
+    final UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator sortedRecords =
+      sorter.getSortedIterator();
+
+    // Currently, we need to open a new DiskBlockObjectWriter for each partition; we can avoid this
+    // after SPARK-5581 is fixed.
+    BlockObjectWriter writer;
+
+    // Small writes to DiskBlockObjectWriter will be fairly inefficient. Since there doesn't seem to
+    // be an API to directly transfer bytes from managed memory to the disk writer, we buffer
+    // data through a byte array. This array does not need to be large enough to hold a single
+    // record;
+    final byte[] writeBuffer = new byte[DISK_WRITE_BUFFER_SIZE];
+
+    // Because this output will be read during shuffle, its compression codec must be controlled by
+    // spark.shuffle.compress instead of spark.shuffle.spill.compress, so we need to use
+    // createTempShuffleBlock here; see SPARK-3426 for more details.
+    final Tuple2<TempShuffleBlockId, File> spilledFileInfo =
+      blockManager.diskBlockManager().createTempShuffleBlock();
+    final File file = spilledFileInfo._2();
+    final TempShuffleBlockId blockId = spilledFileInfo._1();
+    final SpillInfo spillInfo = new SpillInfo(numPartitions, file, blockId);
+
+    // Unfortunately, we need a serializer instance in order to construct a DiskBlockObjectWriter.
+    // Our write path doesn't actually use this serializer (since we end up calling the `write()`
+    // OutputStream methods), but DiskBlockObjectWriter still calls some methods on it. To work
+    // around this, we pass a dummy no-op serializer.
+    final SerializerInstance ser = DummySerializerInstance.INSTANCE;
+
+    writer = blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
+
+    int currentPartition = -1;
+    while (sortedRecords.hasNext()) {
+      sortedRecords.loadNext();
+      final int partition = sortedRecords.packedRecordPointer.getPartitionId();
+      assert (partition >= currentPartition);
+      if (partition != currentPartition) {
+        // Switch to the new partition
+        if (currentPartition != -1) {
+          writer.commitAndClose();
+          spillInfo.partitionLengths[currentPartition] = writer.fileSegment().length();
+        }
+        currentPartition = partition;
+        writer =
+          blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
+      }
+
+      final long recordPointer = sortedRecords.packedRecordPointer.getRecordPointer();
+      final Object recordPage = memoryManager.getPage(recordPointer);
+      final long recordOffsetInPage = memoryManager.getOffsetInPage(recordPointer);
+      int dataRemaining = PlatformDependent.UNSAFE.getInt(recordPage, recordOffsetInPage);
+      long recordReadPosition = recordOffsetInPage + 4; // skip over record length
+      while (dataRemaining > 0) {
+        final int toTransfer = Math.min(DISK_WRITE_BUFFER_SIZE, dataRemaining);
+        PlatformDependent.copyMemory(
+          recordPage,
+          recordReadPosition,
+          writeBuffer,
+          PlatformDependent.BYTE_ARRAY_OFFSET,
+          toTransfer);
+        writer.write(writeBuffer, 0, toTransfer);
+        recordReadPosition += toTransfer;
+        dataRemaining -= toTransfer;
+      }
+      writer.recordWritten();
+    }
+
+    if (writer != null) {
+      writer.commitAndClose();
+      // If `writeSortedFile()` was called from `closeAndGetSpills()` and no records were inserted,
+      // then the file might be empty. Note that it might be better to avoid calling
+      // writeSortedFile() in that case.
+      if (currentPartition != -1) {
+        spillInfo.partitionLengths[currentPartition] = writer.fileSegment().length();
+        spills.add(spillInfo);
+      }
+    }
+
+    if (!isLastFile) {  // i.e. this is a spill file
+      // The current semantics of `shuffleRecordsWritten` seem to be that it's updated when records
+      // are written to disk, not when they enter the shuffle sorting code. DiskBlockObjectWriter
+      // relies on its `recordWritten()` method being called in order to trigger periodic updates to
+      // `shuffleBytesWritten`. If we were to remove the `recordWritten()` call and increment that
+      // counter at a higher-level, then the in-progress metrics for records written and bytes
+      // written would get out of sync.
+      //
+      // When writing the last file, we pass `writeMetrics` directly to the DiskBlockObjectWriter;
+      // in all other cases, we pass in a dummy write metrics to capture metrics, then copy those
+      // metrics to the true write metrics here. The reason for performing this copying is so that
+      // we can avoid reporting spilled bytes as shuffle write bytes.
+      //
+      // Note that we intentionally ignore the value of `writeMetricsToUse.shuffleWriteTime()`.
+      // Consistent with ExternalSorter, we do not count this IO towards shuffle write time.
+      // This means that this IO time is not accounted for anywhere; SPARK-3577 will fix this.
+      writeMetrics.incShuffleRecordsWritten(writeMetricsToUse.shuffleRecordsWritten());
+      taskContext.taskMetrics().incDiskBytesSpilled(writeMetricsToUse.shuffleBytesWritten());
+    }
+  }
+
+  /**
+   * Sort and spill the current records in response to memory pressure.
+   */
+  @VisibleForTesting
+  void spill() throws IOException {
+    logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)",
+      Thread.currentThread().getId(),
+      Utils.bytesToString(getMemoryUsage()),
+      spills.size(),
+      spills.size() > 1 ? " times" : " time");
+
+    writeSortedFile(false);
+    final long sorterMemoryUsage = sorter.getMemoryUsage();
+    sorter = null;
+    shuffleMemoryManager.release(sorterMemoryUsage);
+    final long spillSize = freeMemory();
+    taskContext.taskMetrics().incMemoryBytesSpilled(spillSize);
+
+    initializeForWriting();
+  }
+
+  private long getMemoryUsage() {
+    return sorter.getMemoryUsage() + (allocatedPages.size() * (long) PAGE_SIZE);
+  }
+
+  private long freeMemory() {
+    long memoryFreed = 0;
+    for (MemoryBlock block : allocatedPages) {
+      memoryManager.freePage(block);
+      shuffleMemoryManager.release(block.size());
+      memoryFreed += block.size();
+    }
+    allocatedPages.clear();
+    currentPage = null;
+    currentPagePosition = -1;
+    freeSpaceInCurrentPage = 0;
+    return memoryFreed;
+  }
+
+  /**
+   * Force all memory and spill files to be deleted; called by shuffle error-handling code.
+   */
+  public void cleanupAfterError() {
+    freeMemory();
+    for (SpillInfo spill : spills) {
+      if (spill.file.exists() && !spill.file.delete()) {
+        logger.error("Unable to delete spill file {}", spill.file.getPath());
+      }
+    }
+    if (sorter != null) {
+      shuffleMemoryManager.release(sorter.getMemoryUsage());
+      sorter = null;
+    }
+  }
+
+  /**
+   * Checks whether there is enough space to insert a new record into the sorter.
+   *
+   * @param requiredSpace the required space in the data page, in bytes, including space for storing
+   *                      the record size.
+
+   * @return true if the record can be inserted without requiring more allocations, false otherwise.
+   */
+  private boolean haveSpaceForRecord(int requiredSpace) {
+    assert (requiredSpace > 0);
+    return (sorter.hasSpaceForAnotherRecord() && (requiredSpace <= freeSpaceInCurrentPage));
+  }
+
+  /**
+   * Allocates more memory in order to insert an additional record. This will request additional
+   * memory from the {@link ShuffleMemoryManager} and spill if the requested memory can not be
+   * obtained.
+   *
+   * @param requiredSpace the required space in the data page, in bytes, including space for storing
+   *                      the record size.
+   */
+  private void allocateSpaceForRecord(int requiredSpace) throws IOException {
+    if (!sorter.hasSpaceForAnotherRecord()) {
+      logger.debug("Attempting to expand sort pointer array");
+      final long oldPointerArrayMemoryUsage = sorter.getMemoryUsage();
+      final long memoryToGrowPointerArray = oldPointerArrayMemoryUsage * 2;
+      final long memoryAcquired = shuffleMemoryManager.tryToAcquire(memoryToGrowPointerArray);
+      if (memoryAcquired < memoryToGrowPointerArray) {
+        shuffleMemoryManager.release(memoryAcquired);
+        spill();
+      } else {
+        sorter.expandPointerArray();
+        shuffleMemoryManager.release(oldPointerArrayMemoryUsage);
+      }
+    }
+    if (requiredSpace > freeSpaceInCurrentPage) {
+      logger.trace("Required space {} is less than free space in current page ({})", requiredSpace,
+        freeSpaceInCurrentPage);
+      // TODO: we should track metrics on the amount of space wasted when we roll over to a new page
+      // without using the free space at the end of the current page. We should also do this for
+      // BytesToBytesMap.
+      if (requiredSpace > PAGE_SIZE) {
+        throw new IOException("Required space " + requiredSpace + " is greater than page size (" +
+          PAGE_SIZE + ")");
+      } else {
+        final long memoryAcquired = shuffleMemoryManager.tryToAcquire(PAGE_SIZE);
+        if (memoryAcquired < PAGE_SIZE) {
+          shuffleMemoryManager.release(memoryAcquired);
+          spill();
+          final long memoryAcquiredAfterSpilling = shuffleMemoryManager.tryToAcquire(PAGE_SIZE);
+          if (memoryAcquiredAfterSpilling != PAGE_SIZE) {
+            shuffleMemoryManager.release(memoryAcquiredAfterSpilling);
+            throw new IOException("Unable to acquire " + PAGE_SIZE + " bytes of memory");
+          }
+        }
+        currentPage = memoryManager.allocatePage(PAGE_SIZE);
+        currentPagePosition = currentPage.getBaseOffset();
+        freeSpaceInCurrentPage = PAGE_SIZE;
+        allocatedPages.add(currentPage);
+      }
+    }
+  }
+
+  /**
+   * Write a record to the shuffle sorter.
+   */
+  public void insertRecord(
+      Object recordBaseObject,
+      long recordBaseOffset,
+      int lengthInBytes,
+      int partitionId) throws IOException {
+    // Need 4 bytes to store the record length.
+    final int totalSpaceRequired = lengthInBytes + 4;
+    if (!haveSpaceForRecord(totalSpaceRequired)) {
+      allocateSpaceForRecord(totalSpaceRequired);
+    }
+
+    final long recordAddress =
+      memoryManager.encodePageNumberAndOffset(currentPage, currentPagePosition);
+    final Object dataPageBaseObject = currentPage.getBaseObject();
+    PlatformDependent.UNSAFE.putInt(dataPageBaseObject, currentPagePosition, lengthInBytes);
+    currentPagePosition += 4;
+    freeSpaceInCurrentPage -= 4;
+    PlatformDependent.copyMemory(
+      recordBaseObject,
+      recordBaseOffset,
+      dataPageBaseObject,
+      currentPagePosition,
+      lengthInBytes);
+    currentPagePosition += lengthInBytes;
+    freeSpaceInCurrentPage -= lengthInBytes;
+    sorter.insertRecord(recordAddress, partitionId);
+  }
+
+  /**
+   * Close the sorter, causing any buffered data to be sorted and written out to disk.
+   *
+   * @return metadata for the spill files written by this sorter. If no records were ever inserted
+   *         into this sorter, then this will return an empty array.
+   * @throws IOException
+   */
+  public SpillInfo[] closeAndGetSpills() throws IOException {
+    try {
+      if (sorter != null) {
+        // Do not count the final file towards the spill count.
+        writeSortedFile(true);
+        freeMemory();
+      }
+      return spills.toArray(new SpillInfo[spills.size()]);
+    } catch (IOException e) {
+      cleanupAfterError();
+      throw e;
+    }
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java
new file mode 100644
index 0000000000000..5bab501da9364
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.util.Comparator;
+
+import org.apache.spark.util.collection.Sorter;
+
+final class UnsafeShuffleInMemorySorter {
+
+  private final Sorter<PackedRecordPointer, long[]> sorter;
+  private static final class SortComparator implements Comparator<PackedRecordPointer> {
+    @Override
+    public int compare(PackedRecordPointer left, PackedRecordPointer right) {
+      return left.getPartitionId() - right.getPartitionId();
+    }
+  }
+  private static final SortComparator SORT_COMPARATOR = new SortComparator();
+
+  /**
+   * An array of record pointers and partition ids that have been encoded by
+   * {@link PackedRecordPointer}. The sort operates on this array instead of directly manipulating
+   * records.
+   */
+  private long[] pointerArray;
+
+  /**
+   * The position in the pointer array where new records can be inserted.
+   */
+  private int pointerArrayInsertPosition = 0;
+
+  public UnsafeShuffleInMemorySorter(int initialSize) {
+    assert (initialSize > 0);
+    this.pointerArray = new long[initialSize];
+    this.sorter = new Sorter<PackedRecordPointer, long[]>(UnsafeShuffleSortDataFormat.INSTANCE);
+  }
+
+  public void expandPointerArray() {
+    final long[] oldArray = pointerArray;
+    // Guard against overflow:
+    final int newLength = oldArray.length * 2 > 0 ? (oldArray.length * 2) : Integer.MAX_VALUE;
+    pointerArray = new long[newLength];
+    System.arraycopy(oldArray, 0, pointerArray, 0, oldArray.length);
+  }
+
+  public boolean hasSpaceForAnotherRecord() {
+    return pointerArrayInsertPosition + 1 < pointerArray.length;
+  }
+
+  public long getMemoryUsage() {
+    return pointerArray.length * 8L;
+  }
+
+  /**
+   * Inserts a record to be sorted.
+   *
+   * @param recordPointer a pointer to the record, encoded by the task memory manager. Due to
+   *                      certain pointer compression techniques used by the sorter, the sort can
+   *                      only operate on pointers that point to locations in the first
+   *                      {@link PackedRecordPointer#MAXIMUM_PAGE_SIZE_BYTES} bytes of a data page.
+   * @param partitionId the partition id, which must be less than or equal to
+   *                    {@link PackedRecordPointer#MAXIMUM_PARTITION_ID}.
+   */
+  public void insertRecord(long recordPointer, int partitionId) {
+    if (!hasSpaceForAnotherRecord()) {
+      if (pointerArray.length == Integer.MAX_VALUE) {
+        throw new IllegalStateException("Sort pointer array has reached maximum size");
+      } else {
+        expandPointerArray();
+      }
+    }
+    pointerArray[pointerArrayInsertPosition] =
+        PackedRecordPointer.packPointer(recordPointer, partitionId);
+    pointerArrayInsertPosition++;
+  }
+
+  /**
+   * An iterator-like class that's used instead of Java's Iterator in order to facilitate inlining.
+   */
+  public static final class UnsafeShuffleSorterIterator {
+
+    private final long[] pointerArray;
+    private final int numRecords;
+    final PackedRecordPointer packedRecordPointer = new PackedRecordPointer();
+    private int position = 0;
+
+    public UnsafeShuffleSorterIterator(int numRecords, long[] pointerArray) {
+      this.numRecords = numRecords;
+      this.pointerArray = pointerArray;
+    }
+
+    public boolean hasNext() {
+      return position < numRecords;
+    }
+
+    public void loadNext() {
+      packedRecordPointer.set(pointerArray[position]);
+      position++;
+    }
+  }
+
+  /**
+   * Return an iterator over record pointers in sorted order.
+   */
+  public UnsafeShuffleSorterIterator getSortedIterator() {
+    sorter.sort(pointerArray, 0, pointerArrayInsertPosition, SORT_COMPARATOR);
+    return new UnsafeShuffleSorterIterator(pointerArrayInsertPosition, pointerArray);
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java
new file mode 100644
index 0000000000000..a66d74ee44782
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import org.apache.spark.util.collection.SortDataFormat;
+
+final class UnsafeShuffleSortDataFormat extends SortDataFormat<PackedRecordPointer, long[]> {
+
+  public static final UnsafeShuffleSortDataFormat INSTANCE = new UnsafeShuffleSortDataFormat();
+
+  private UnsafeShuffleSortDataFormat() { }
+
+  @Override
+  public PackedRecordPointer getKey(long[] data, int pos) {
+    // Since we re-use keys, this method shouldn't be called.
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public PackedRecordPointer newKey() {
+    return new PackedRecordPointer();
+  }
+
+  @Override
+  public PackedRecordPointer getKey(long[] data, int pos, PackedRecordPointer reuse) {
+    reuse.set(data[pos]);
+    return reuse;
+  }
+
+  @Override
+  public void swap(long[] data, int pos0, int pos1) {
+    final long temp = data[pos0];
+    data[pos0] = data[pos1];
+    data[pos1] = temp;
+  }
+
+  @Override
+  public void copyElement(long[] src, int srcPos, long[] dst, int dstPos) {
+    dst[dstPos] = src[srcPos];
+  }
+
+  @Override
+  public void copyRange(long[] src, int srcPos, long[] dst, int dstPos, int length) {
+    System.arraycopy(src, srcPos, dst, dstPos, length);
+  }
+
+  @Override
+  public long[] allocate(int length) {
+    return new long[length];
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java
new file mode 100644
index 0000000000000..ad7eb04afcd8c
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java
@@ -0,0 +1,438 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.io.*;
+import java.nio.channels.FileChannel;
+import java.util.Iterator;
+import javax.annotation.Nullable;
+
+import scala.Option;
+import scala.Product2;
+import scala.collection.JavaConversions;
+import scala.reflect.ClassTag;
+import scala.reflect.ClassTag$;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.Closeables;
+import com.google.common.io.Files;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.*;
+import org.apache.spark.annotation.Private;
+import org.apache.spark.io.CompressionCodec;
+import org.apache.spark.io.CompressionCodec$;
+import org.apache.spark.io.LZFCompressionCodec;
+import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.network.util.LimitedInputStream;
+import org.apache.spark.scheduler.MapStatus;
+import org.apache.spark.scheduler.MapStatus$;
+import org.apache.spark.serializer.SerializationStream;
+import org.apache.spark.serializer.Serializer;
+import org.apache.spark.serializer.SerializerInstance;
+import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.ShuffleMemoryManager;
+import org.apache.spark.shuffle.ShuffleWriter;
+import org.apache.spark.storage.BlockManager;
+import org.apache.spark.storage.TimeTrackingOutputStream;
+import org.apache.spark.unsafe.PlatformDependent;
+import org.apache.spark.unsafe.memory.TaskMemoryManager;
+
+@Private
+public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
+
+  private final Logger logger = LoggerFactory.getLogger(UnsafeShuffleWriter.class);
+
+  private static final ClassTag<Object> OBJECT_CLASS_TAG = ClassTag$.MODULE$.Object();
+
+  @VisibleForTesting
+  static final int INITIAL_SORT_BUFFER_SIZE = 4096;
+
+  private final BlockManager blockManager;
+  private final IndexShuffleBlockResolver shuffleBlockResolver;
+  private final TaskMemoryManager memoryManager;
+  private final ShuffleMemoryManager shuffleMemoryManager;
+  private final SerializerInstance serializer;
+  private final Partitioner partitioner;
+  private final ShuffleWriteMetrics writeMetrics;
+  private final int shuffleId;
+  private final int mapId;
+  private final TaskContext taskContext;
+  private final SparkConf sparkConf;
+  private final boolean transferToEnabled;
+
+  private MapStatus mapStatus = null;
+  private UnsafeShuffleExternalSorter sorter = null;
+
+  /** Subclass of ByteArrayOutputStream that exposes `buf` directly. */
+  private static final class MyByteArrayOutputStream extends ByteArrayOutputStream {
+    public MyByteArrayOutputStream(int size) { super(size); }
+    public byte[] getBuf() { return buf; }
+  }
+
+  private MyByteArrayOutputStream serBuffer;
+  private SerializationStream serOutputStream;
+
+  /**
+   * Are we in the process of stopping? Because map tasks can call stop() with success = true
+   * and then call stop() with success = false if they get an exception, we want to make sure
+   * we don't try deleting files, etc twice.
+   */
+  private boolean stopping = false;
+
+  public UnsafeShuffleWriter(
+      BlockManager blockManager,
+      IndexShuffleBlockResolver shuffleBlockResolver,
+      TaskMemoryManager memoryManager,
+      ShuffleMemoryManager shuffleMemoryManager,
+      UnsafeShuffleHandle<K, V> handle,
+      int mapId,
+      TaskContext taskContext,
+      SparkConf sparkConf) throws IOException {
+    final int numPartitions = handle.dependency().partitioner().numPartitions();
+    if (numPartitions > UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS()) {
+      throw new IllegalArgumentException(
+        "UnsafeShuffleWriter can only be used for shuffles with at most " +
+          UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS() + " reduce partitions");
+    }
+    this.blockManager = blockManager;
+    this.shuffleBlockResolver = shuffleBlockResolver;
+    this.memoryManager = memoryManager;
+    this.shuffleMemoryManager = shuffleMemoryManager;
+    this.mapId = mapId;
+    final ShuffleDependency<K, V, V> dep = handle.dependency();
+    this.shuffleId = dep.shuffleId();
+    this.serializer = Serializer.getSerializer(dep.serializer()).newInstance();
+    this.partitioner = dep.partitioner();
+    this.writeMetrics = new ShuffleWriteMetrics();
+    taskContext.taskMetrics().shuffleWriteMetrics_$eq(Option.apply(writeMetrics));
+    this.taskContext = taskContext;
+    this.sparkConf = sparkConf;
+    this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
+    open();
+  }
+
+  /**
+   * This convenience method should only be called in test code.
+   */
+  @VisibleForTesting
+  public void write(Iterator<Product2<K, V>> records) throws IOException {
+    write(JavaConversions.asScalaIterator(records));
+  }
+
+  @Override
+  public void write(scala.collection.Iterator<Product2<K, V>> records) throws IOException {
+    boolean success = false;
+    try {
+      while (records.hasNext()) {
+        insertRecordIntoSorter(records.next());
+      }
+      closeAndWriteOutput();
+      success = true;
+    } finally {
+      if (!success) {
+        sorter.cleanupAfterError();
+      }
+    }
+  }
+
+  private void open() throws IOException {
+    assert (sorter == null);
+    sorter = new UnsafeShuffleExternalSorter(
+      memoryManager,
+      shuffleMemoryManager,
+      blockManager,
+      taskContext,
+      INITIAL_SORT_BUFFER_SIZE,
+      partitioner.numPartitions(),
+      sparkConf,
+      writeMetrics);
+    serBuffer = new MyByteArrayOutputStream(1024 * 1024);
+    serOutputStream = serializer.serializeStream(serBuffer);
+  }
+
+  @VisibleForTesting
+  void closeAndWriteOutput() throws IOException {
+    serBuffer = null;
+    serOutputStream = null;
+    final SpillInfo[] spills = sorter.closeAndGetSpills();
+    sorter = null;
+    final long[] partitionLengths;
+    try {
+      partitionLengths = mergeSpills(spills);
+    } finally {
+      for (SpillInfo spill : spills) {
+        if (spill.file.exists() && ! spill.file.delete()) {
+          logger.error("Error while deleting spill file {}", spill.file.getPath());
+        }
+      }
+    }
+    shuffleBlockResolver.writeIndexFile(shuffleId, mapId, partitionLengths);
+    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+  }
+
+  @VisibleForTesting
+  void insertRecordIntoSorter(Product2<K, V> record) throws IOException {
+    final K key = record._1();
+    final int partitionId = partitioner.getPartition(key);
+    serBuffer.reset();
+    serOutputStream.writeKey(key, OBJECT_CLASS_TAG);
+    serOutputStream.writeValue(record._2(), OBJECT_CLASS_TAG);
+    serOutputStream.flush();
+
+    final int serializedRecordSize = serBuffer.size();
+    assert (serializedRecordSize > 0);
+
+    sorter.insertRecord(
+      serBuffer.getBuf(), PlatformDependent.BYTE_ARRAY_OFFSET, serializedRecordSize, partitionId);
+  }
+
+  @VisibleForTesting
+  void forceSorterToSpill() throws IOException {
+    assert (sorter != null);
+    sorter.spill();
+  }
+
+  /**
+   * Merge zero or more spill files together, choosing the fastest merging strategy based on the
+   * number of spills and the IO compression codec.
+   *
+   * @return the partition lengths in the merged file.
+   */
+  private long[] mergeSpills(SpillInfo[] spills) throws IOException {
+    final File outputFile = shuffleBlockResolver.getDataFile(shuffleId, mapId);
+    final boolean compressionEnabled = sparkConf.getBoolean("spark.shuffle.compress", true);
+    final CompressionCodec compressionCodec = CompressionCodec$.MODULE$.createCodec(sparkConf);
+    final boolean fastMergeEnabled =
+      sparkConf.getBoolean("spark.shuffle.unsafe.fastMergeEnabled", true);
+    final boolean fastMergeIsSupported =
+      !compressionEnabled || compressionCodec instanceof LZFCompressionCodec;
+    try {
+      if (spills.length == 0) {
+        new FileOutputStream(outputFile).close(); // Create an empty file
+        return new long[partitioner.numPartitions()];
+      } else if (spills.length == 1) {
+        // Here, we don't need to perform any metrics updates because the bytes written to this
+        // output file would have already been counted as shuffle bytes written.
+        Files.move(spills[0].file, outputFile);
+        return spills[0].partitionLengths;
+      } else {
+        final long[] partitionLengths;
+        // There are multiple spills to merge, so none of these spill files' lengths were counted
+        // towards our shuffle write count or shuffle write time. If we use the slow merge path,
+        // then the final output file's size won't necessarily be equal to the sum of the spill
+        // files' sizes. To guard against this case, we look at the output file's actual size when
+        // computing shuffle bytes written.
+        //
+        // We allow the individual merge methods to report their own IO times since different merge
+        // strategies use different IO techniques.  We count IO during merge towards the shuffle
+        // shuffle write time, which appears to be consistent with the "not bypassing merge-sort"
+        // branch in ExternalSorter.
+        if (fastMergeEnabled && fastMergeIsSupported) {
+          // Compression is disabled or we are using an IO compression codec that supports
+          // decompression of concatenated compressed streams, so we can perform a fast spill merge
+          // that doesn't need to interpret the spilled bytes.
+          if (transferToEnabled) {
+            logger.debug("Using transferTo-based fast merge");
+            partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
+          } else {
+            logger.debug("Using fileStream-based fast merge");
+            partitionLengths = mergeSpillsWithFileStream(spills, outputFile, null);
+          }
+        } else {
+          logger.debug("Using slow merge");
+          partitionLengths = mergeSpillsWithFileStream(spills, outputFile, compressionCodec);
+        }
+        // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has
+        // in-memory records, we write out the in-memory records to a file but do not count that
+        // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs
+        // to be counted as shuffle write, but this will lead to double-counting of the final
+        // SpillInfo's bytes.
+        writeMetrics.decShuffleBytesWritten(spills[spills.length - 1].file.length());
+        writeMetrics.incShuffleBytesWritten(outputFile.length());
+        return partitionLengths;
+      }
+    } catch (IOException e) {
+      if (outputFile.exists() && !outputFile.delete()) {
+        logger.error("Unable to delete output file {}", outputFile.getPath());
+      }
+      throw e;
+    }
+  }
+
+  /**
+   * Merges spill files using Java FileStreams. This code path is slower than the NIO-based merge,
+   * {@link UnsafeShuffleWriter#mergeSpillsWithTransferTo(SpillInfo[], File)}, so it's only used in
+   * cases where the IO compression codec does not support concatenation of compressed data, or in
+   * cases where users have explicitly disabled use of {@code transferTo} in order to work around
+   * kernel bugs.
+   *
+   * @param spills the spills to merge.
+   * @param outputFile the file to write the merged data to.
+   * @param compressionCodec the IO compression codec, or null if shuffle compression is disabled.
+   * @return the partition lengths in the merged file.
+   */
+  private long[] mergeSpillsWithFileStream(
+      SpillInfo[] spills,
+      File outputFile,
+      @Nullable CompressionCodec compressionCodec) throws IOException {
+    assert (spills.length >= 2);
+    final int numPartitions = partitioner.numPartitions();
+    final long[] partitionLengths = new long[numPartitions];
+    final InputStream[] spillInputStreams = new FileInputStream[spills.length];
+    OutputStream mergedFileOutputStream = null;
+
+    boolean threwException = true;
+    try {
+      for (int i = 0; i < spills.length; i++) {
+        spillInputStreams[i] = new FileInputStream(spills[i].file);
+      }
+      for (int partition = 0; partition < numPartitions; partition++) {
+        final long initialFileLength = outputFile.length();
+        mergedFileOutputStream =
+          new TimeTrackingOutputStream(writeMetrics, new FileOutputStream(outputFile, true));
+        if (compressionCodec != null) {
+          mergedFileOutputStream = compressionCodec.compressedOutputStream(mergedFileOutputStream);
+        }
+
+        for (int i = 0; i < spills.length; i++) {
+          final long partitionLengthInSpill = spills[i].partitionLengths[partition];
+          if (partitionLengthInSpill > 0) {
+            InputStream partitionInputStream =
+              new LimitedInputStream(spillInputStreams[i], partitionLengthInSpill);
+            if (compressionCodec != null) {
+              partitionInputStream = compressionCodec.compressedInputStream(partitionInputStream);
+            }
+            ByteStreams.copy(partitionInputStream, mergedFileOutputStream);
+          }
+        }
+        mergedFileOutputStream.flush();
+        mergedFileOutputStream.close();
+        partitionLengths[partition] = (outputFile.length() - initialFileLength);
+      }
+      threwException = false;
+    } finally {
+      // To avoid masking exceptions that caused us to prematurely enter the finally block, only
+      // throw exceptions during cleanup if threwException == false.
+      for (InputStream stream : spillInputStreams) {
+        Closeables.close(stream, threwException);
+      }
+      Closeables.close(mergedFileOutputStream, threwException);
+    }
+    return partitionLengths;
+  }
+
+  /**
+   * Merges spill files by using NIO's transferTo to concatenate spill partitions' bytes.
+   * This is only safe when the IO compression codec and serializer support concatenation of
+   * serialized streams.
+   *
+   * @return the partition lengths in the merged file.
+   */
+  private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) throws IOException {
+    assert (spills.length >= 2);
+    final int numPartitions = partitioner.numPartitions();
+    final long[] partitionLengths = new long[numPartitions];
+    final FileChannel[] spillInputChannels = new FileChannel[spills.length];
+    final long[] spillInputChannelPositions = new long[spills.length];
+    FileChannel mergedFileOutputChannel = null;
+
+    boolean threwException = true;
+    try {
+      for (int i = 0; i < spills.length; i++) {
+        spillInputChannels[i] = new FileInputStream(spills[i].file).getChannel();
+      }
+      // This file needs to opened in append mode in order to work around a Linux kernel bug that
+      // affects transferTo; see SPARK-3948 for more details.
+      mergedFileOutputChannel = new FileOutputStream(outputFile, true).getChannel();
+
+      long bytesWrittenToMergedFile = 0;
+      for (int partition = 0; partition < numPartitions; partition++) {
+        for (int i = 0; i < spills.length; i++) {
+          final long partitionLengthInSpill = spills[i].partitionLengths[partition];
+          long bytesToTransfer = partitionLengthInSpill;
+          final FileChannel spillInputChannel = spillInputChannels[i];
+          final long writeStartTime = System.nanoTime();
+          while (bytesToTransfer > 0) {
+            final long actualBytesTransferred = spillInputChannel.transferTo(
+              spillInputChannelPositions[i],
+              bytesToTransfer,
+              mergedFileOutputChannel);
+            spillInputChannelPositions[i] += actualBytesTransferred;
+            bytesToTransfer -= actualBytesTransferred;
+          }
+          writeMetrics.incShuffleWriteTime(System.nanoTime() - writeStartTime);
+          bytesWrittenToMergedFile += partitionLengthInSpill;
+          partitionLengths[partition] += partitionLengthInSpill;
+        }
+      }
+      // Check the position after transferTo loop to see if it is in the right position and raise an
+      // exception if it is incorrect. The position will not be increased to the expected length
+      // after calling transferTo in kernel version 2.6.32. This issue is described at
+      // https://bugs.openjdk.java.net/browse/JDK-7052359 and SPARK-3948.
+      if (mergedFileOutputChannel.position() != bytesWrittenToMergedFile) {
+        throw new IOException(
+          "Current position " + mergedFileOutputChannel.position() + " does not equal expected " +
+            "position " + bytesWrittenToMergedFile + " after transferTo. Please check your kernel" +
+            " version to see if it is 2.6.32, as there is a kernel bug which will lead to " +
+            "unexpected behavior when using transferTo. You can set spark.file.transferTo=false " +
+            "to disable this NIO feature."
+        );
+      }
+      threwException = false;
+    } finally {
+      // To avoid masking exceptions that caused us to prematurely enter the finally block, only
+      // throw exceptions during cleanup if threwException == false.
+      for (int i = 0; i < spills.length; i++) {
+        assert(spillInputChannelPositions[i] == spills[i].file.length());
+        Closeables.close(spillInputChannels[i], threwException);
+      }
+      Closeables.close(mergedFileOutputChannel, threwException);
+    }
+    return partitionLengths;
+  }
+
+  @Override
+  public Option<MapStatus> stop(boolean success) {
+    try {
+      if (stopping) {
+        return Option.apply(null);
+      } else {
+        stopping = true;
+        if (success) {
+          if (mapStatus == null) {
+            throw new IllegalStateException("Cannot call stop(true) without having called write()");
+          }
+          return Option.apply(mapStatus);
+        } else {
+          // The map task failed, so delete our output data.
+          shuffleBlockResolver.removeDataByMap(shuffleId, mapId);
+          return Option.apply(null);
+        }
+      }
+    } finally {
+      if (sorter != null) {
+        // If sorter is non-null, then this implies that we called stop() in response to an error,
+        // so we need to clean up memory and spill files created by the sorter
+        sorter.cleanupAfterError();
+      }
+    }
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java b/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java
new file mode 100644
index 0000000000000..dc2aa30466cc6
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.spark.annotation.Private;
+import org.apache.spark.executor.ShuffleWriteMetrics;
+
+/**
+ * Intercepts write calls and tracks total time spent writing in order to update shuffle write
+ * metrics. Not thread safe.
+ */
+@Private
+public final class TimeTrackingOutputStream extends OutputStream {
+
+  private final ShuffleWriteMetrics writeMetrics;
+  private final OutputStream outputStream;
+
+  public TimeTrackingOutputStream(ShuffleWriteMetrics writeMetrics, OutputStream outputStream) {
+    this.writeMetrics = writeMetrics;
+    this.outputStream = outputStream;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    final long startTime = System.nanoTime();
+    outputStream.write(b);
+    writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime);
+  }
+
+  @Override
+  public void write(byte[] b) throws IOException {
+    final long startTime = System.nanoTime();
+    outputStream.write(b);
+    writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime);
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    final long startTime = System.nanoTime();
+    outputStream.write(b, off, len);
+    writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime);
+  }
+
+  @Override
+  public void flush() throws IOException {
+    final long startTime = System.nanoTime();
+    outputStream.flush();
+    writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime);
+  }
+
+  @Override
+  public void close() throws IOException {
+    final long startTime = System.nanoTime();
+    outputStream.close();
+    writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime);
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 0c4d28f786edd..a5d831c7e68ad 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -313,7 +313,8 @@ object SparkEnv extends Logging {
     // Let the user specify short names for shuffle managers
     val shortShuffleMgrNames = Map(
       "hash" -> "org.apache.spark.shuffle.hash.HashShuffleManager",
-      "sort" -> "org.apache.spark.shuffle.sort.SortShuffleManager")
+      "sort" -> "org.apache.spark.shuffle.sort.SortShuffleManager",
+      "tungsten-sort" -> "org.apache.spark.shuffle.unsafe.UnsafeShuffleManager")
     val shuffleMgrName = conf.get("spark.shuffle.manager", "sort")
     val shuffleMgrClass = shortShuffleMgrNames.getOrElse(shuffleMgrName.toLowerCase, shuffleMgrName)
     val shuffleManager = instantiateClass[ShuffleManager](shuffleMgrClass)
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index dfbde7c8a1b0d..698d1384d580d 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -121,6 +121,8 @@ class JavaSerializer(conf: SparkConf) extends Serializer with Externalizable {
   private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 100)
   private var extraDebugInfo = conf.getBoolean("spark.serializer.extraDebugInfo", true)
 
+  protected def this() = this(new SparkConf())  // For deserialization only
+
   override def newInstance(): SerializerInstance = {
     val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader)
     new JavaSerializerInstance(counterReset, extraDebugInfo, classLoader)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala
index 6ad427bcac7f9..6c3b3080d2605 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala
@@ -76,7 +76,7 @@ private[spark] class FileShuffleBlockResolver(conf: SparkConf)
   private val consolidateShuffleFiles =
     conf.getBoolean("spark.shuffle.consolidateFiles", false)
 
-  // Use getSizeAsKb (not bytes) to maintain backwards compatibility of on units are provided 
+  // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
   private val bufferSize = conf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
index f6e6fe5defe09..4cc4ef5f1886e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.shuffle
 
+import java.io.IOException
+
 import org.apache.spark.scheduler.MapStatus
 
 /**
  * Obtained inside a map task to write out records to the shuffle system.
  */
-private[spark] trait ShuffleWriter[K, V] {
+private[spark] abstract class ShuffleWriter[K, V] {
   /** Write a sequence of records to this task's output */
-  def write(records: Iterator[_ <: Product2[K, V]]): Unit
+  @throws[IOException]
+  def write(records: Iterator[Product2[K, V]]): Unit
 
   /** Close this writer, passing along whether the map completed */
   def stop(success: Boolean): Option[MapStatus]
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
index 897f0a5dc5bcc..eb87cee15903c 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala
@@ -49,7 +49,7 @@ private[spark] class HashShuffleWriter[K, V](
     writeMetrics)
 
   /** Write a bunch of records to this task's output */
-  override def write(records: Iterator[_ <: Product2[K, V]]): Unit = {
+  override def write(records: Iterator[Product2[K, V]]): Unit = {
     val iter = if (dep.aggregator.isDefined) {
       if (dep.mapSideCombine) {
         dep.aggregator.get.combineValuesByKey(records, context)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 15842941daaab..d7fab351ca3b8 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -72,7 +72,7 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
     true
   }
 
-  override def shuffleBlockResolver: IndexShuffleBlockResolver = {
+  override val shuffleBlockResolver: IndexShuffleBlockResolver = {
     indexShuffleBlockResolver
   }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index add2656294ca2..c9dd6bfc4c219 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -48,7 +48,7 @@ private[spark] class SortShuffleWriter[K, V, C](
   context.taskMetrics.shuffleWriteMetrics = Some(writeMetrics)
 
   /** Write a bunch of records to this task's output */
-  override def write(records: Iterator[_ <: Product2[K, V]]): Unit = {
+  override def write(records: Iterator[Product2[K, V]]): Unit = {
     if (dep.mapSideCombine) {
       require(dep.aggregator.isDefined, "Map-side combine without Aggregator specified!")
       sorter = new ExternalSorter[K, V, C](
diff --git a/core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala
new file mode 100644
index 0000000000000..f2bfef376d3ca
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe
+
+import java.util.Collections
+import java.util.concurrent.ConcurrentHashMap
+
+import org.apache.spark._
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.shuffle._
+import org.apache.spark.shuffle.sort.SortShuffleManager
+
+/**
+ * Subclass of [[BaseShuffleHandle]], used to identify when we've chosen to use the new shuffle.
+ */
+private[spark] class UnsafeShuffleHandle[K, V](
+    shuffleId: Int,
+    numMaps: Int,
+    dependency: ShuffleDependency[K, V, V])
+  extends BaseShuffleHandle(shuffleId, numMaps, dependency) {
+}
+
+private[spark] object UnsafeShuffleManager extends Logging {
+
+  /**
+   * The maximum number of shuffle output partitions that UnsafeShuffleManager supports.
+   */
+  val MAX_SHUFFLE_OUTPUT_PARTITIONS = PackedRecordPointer.MAXIMUM_PARTITION_ID + 1
+
+  /**
+   * Helper method for determining whether a shuffle should use the optimized unsafe shuffle
+   * path or whether it should fall back to the original sort-based shuffle.
+   */
+  def canUseUnsafeShuffle[K, V, C](dependency: ShuffleDependency[K, V, C]): Boolean = {
+    val shufId = dependency.shuffleId
+    val serializer = Serializer.getSerializer(dependency.serializer)
+    if (!serializer.supportsRelocationOfSerializedObjects) {
+      log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because the serializer, " +
+        s"${serializer.getClass.getName}, does not support object relocation")
+      false
+    } else if (dependency.aggregator.isDefined) {
+      log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because an aggregator is defined")
+      false
+    } else if (dependency.keyOrdering.isDefined) {
+      log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because a key ordering is defined")
+      false
+    } else if (dependency.partitioner.numPartitions > MAX_SHUFFLE_OUTPUT_PARTITIONS) {
+      log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because it has more than " +
+        s"$MAX_SHUFFLE_OUTPUT_PARTITIONS partitions")
+      false
+    } else {
+      log.debug(s"Can use UnsafeShuffle for shuffle $shufId")
+      true
+    }
+  }
+}
+
+/**
+ * A shuffle implementation that uses directly-managed memory to implement several performance
+ * optimizations for certain types of shuffles. In cases where the new performance optimizations
+ * cannot be applied, this shuffle manager delegates to [[SortShuffleManager]] to handle those
+ * shuffles.
+ *
+ * UnsafeShuffleManager's optimizations will apply when _all_ of the following conditions hold:
+ *
+ *  - The shuffle dependency specifies no aggregation or output ordering.
+ *  - The shuffle serializer supports relocation of serialized values (this is currently supported
+ *    by KryoSerializer and Spark SQL's custom serializers).
+ *  - The shuffle produces fewer than 16777216 output partitions.
+ *  - No individual record is larger than 128 MB when serialized.
+ *
+ * In addition, extra spill-merging optimizations are automatically applied when the shuffle
+ * compression codec supports concatenation of serialized streams. This is currently supported by
+ * Spark's LZF serializer.
+ *
+ * At a high-level, UnsafeShuffleManager's design is similar to Spark's existing SortShuffleManager.
+ * In sort-based shuffle, incoming records are sorted according to their target partition ids, then
+ * written to a single map output file. Reducers fetch contiguous regions of this file in order to
+ * read their portion of the map output. In cases where the map output data is too large to fit in
+ * memory, sorted subsets of the output can are spilled to disk and those on-disk files are merged
+ * to produce the final output file.
+ *
+ * UnsafeShuffleManager optimizes this process in several ways:
+ *
+ *  - Its sort operates on serialized binary data rather than Java objects, which reduces memory
+ *    consumption and GC overheads. This optimization requires the record serializer to have certain
+ *    properties to allow serialized records to be re-ordered without requiring deserialization.
+ *    See SPARK-4550, where this optimization was first proposed and implemented, for more details.
+ *
+ *  - It uses a specialized cache-efficient sorter ([[UnsafeShuffleExternalSorter]]) that sorts
+ *    arrays of compressed record pointers and partition ids. By using only 8 bytes of space per
+ *    record in the sorting array, this fits more of the array into cache.
+ *
+ *  - The spill merging procedure operates on blocks of serialized records that belong to the same
+ *    partition and does not need to deserialize records during the merge.
+ *
+ *  - When the spill compression codec supports concatenation of compressed data, the spill merge
+ *    simply concatenates the serialized and compressed spill partitions to produce the final output
+ *    partition.  This allows efficient data copying methods, like NIO's `transferTo`, to be used
+ *    and avoids the need to allocate decompression or copying buffers during the merge.
+ *
+ * For more details on UnsafeShuffleManager's design, see SPARK-7081.
+ */
+private[spark] class UnsafeShuffleManager(conf: SparkConf) extends ShuffleManager with Logging {
+
+  if (!conf.getBoolean("spark.shuffle.spill", true)) {
+    logWarning(
+      "spark.shuffle.spill was set to false, but this is ignored by the tungsten-sort shuffle " +
+      "manager; its optimized shuffles will continue to spill to disk when necessary.")
+  }
+
+  private[this] val sortShuffleManager: SortShuffleManager = new SortShuffleManager(conf)
+  private[this] val shufflesThatFellBackToSortShuffle =
+    Collections.newSetFromMap(new ConcurrentHashMap[Int, java.lang.Boolean]())
+  private[this] val numMapsForShufflesThatUsedNewPath = new ConcurrentHashMap[Int, Int]()
+
+  /**
+   * Register a shuffle with the manager and obtain a handle for it to pass to tasks.
+   */
+  override def registerShuffle[K, V, C](
+      shuffleId: Int,
+      numMaps: Int,
+      dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
+    if (UnsafeShuffleManager.canUseUnsafeShuffle(dependency)) {
+      new UnsafeShuffleHandle[K, V](
+        shuffleId, numMaps, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
+    } else {
+      new BaseShuffleHandle(shuffleId, numMaps, dependency)
+    }
+  }
+
+  /**
+   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive).
+   * Called on executors by reduce tasks.
+   */
+  override def getReader[K, C](
+      handle: ShuffleHandle,
+      startPartition: Int,
+      endPartition: Int,
+      context: TaskContext): ShuffleReader[K, C] = {
+    sortShuffleManager.getReader(handle, startPartition, endPartition, context)
+  }
+
+  /** Get a writer for a given partition. Called on executors by map tasks. */
+  override def getWriter[K, V](
+      handle: ShuffleHandle,
+      mapId: Int,
+      context: TaskContext): ShuffleWriter[K, V] = {
+    handle match {
+      case unsafeShuffleHandle: UnsafeShuffleHandle[K, V] =>
+        numMapsForShufflesThatUsedNewPath.putIfAbsent(handle.shuffleId, unsafeShuffleHandle.numMaps)
+        val env = SparkEnv.get
+        new UnsafeShuffleWriter(
+          env.blockManager,
+          shuffleBlockResolver.asInstanceOf[IndexShuffleBlockResolver],
+          context.taskMemoryManager(),
+          env.shuffleMemoryManager,
+          unsafeShuffleHandle,
+          mapId,
+          context,
+          env.conf)
+      case other =>
+        shufflesThatFellBackToSortShuffle.add(handle.shuffleId)
+        sortShuffleManager.getWriter(handle, mapId, context)
+    }
+  }
+
+  /** Remove a shuffle's metadata from the ShuffleManager. */
+  override def unregisterShuffle(shuffleId: Int): Boolean = {
+    if (shufflesThatFellBackToSortShuffle.remove(shuffleId)) {
+      sortShuffleManager.unregisterShuffle(shuffleId)
+    } else {
+      Option(numMapsForShufflesThatUsedNewPath.remove(shuffleId)).foreach { numMaps =>
+        (0 until numMaps).foreach { mapId =>
+          shuffleBlockResolver.removeDataByMap(shuffleId, mapId)
+        }
+      }
+      true
+    }
+  }
+
+  override val shuffleBlockResolver: IndexShuffleBlockResolver = {
+    sortShuffleManager.shuffleBlockResolver
+  }
+
+  /** Shut down this ShuffleManager. */
+  override def stop(): Unit = {
+    sortShuffleManager.stop()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index 8bc4e205bc3c6..a33f22ef52687 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -86,16 +86,6 @@ private[spark] class DiskBlockObjectWriter(
   extends BlockObjectWriter(blockId)
   with Logging
 {
-  /** Intercepts write calls and tracks total time spent writing. Not thread safe. */
-  private class TimeTrackingOutputStream(out: OutputStream) extends OutputStream {
-    override def write(i: Int): Unit = callWithTiming(out.write(i))
-    override def write(b: Array[Byte]): Unit = callWithTiming(out.write(b))
-    override def write(b: Array[Byte], off: Int, len: Int): Unit = {
-      callWithTiming(out.write(b, off, len))
-    }
-    override def close(): Unit = out.close()
-    override def flush(): Unit = out.flush()
-  }
 
   /** The file channel, used for repositioning / truncating the file. */
   private var channel: FileChannel = null
@@ -136,7 +126,7 @@ private[spark] class DiskBlockObjectWriter(
       throw new IllegalStateException("Writer already closed. Cannot be reopened.")
     }
     fos = new FileOutputStream(file, true)
-    ts = new TimeTrackingOutputStream(fos)
+    ts = new TimeTrackingOutputStream(writeMetrics, fos)
     channel = fos.getChannel()
     bs = compressStream(new BufferedOutputStream(ts, bufferSize))
     objOut = serializerInstance.serializeStream(bs)
@@ -150,9 +140,9 @@ private[spark] class DiskBlockObjectWriter(
         if (syncWrites) {
           // Force outstanding writes to disk and track how long it takes
           objOut.flush()
-          callWithTiming {
-            fos.getFD.sync()
-          }
+          val start = System.nanoTime()
+          fos.getFD.sync()
+          writeMetrics.incShuffleWriteTime(System.nanoTime() - start)
         }
       } {
         objOut.close()
@@ -251,12 +241,6 @@ private[spark] class DiskBlockObjectWriter(
     reportedPosition = pos
   }
 
-  private def callWithTiming(f: => Unit) = {
-    val start = System.nanoTime()
-    f
-    writeMetrics.incShuffleWriteTime(System.nanoTime() - start)
-  }
-
   // For testing
   private[spark] override def flush() {
     objOut.flush()
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index b850973145077..df2d6ad3b41a4 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -90,7 +90,7 @@ class ExternalAppendOnlyMap[K, V, C](
   // Number of bytes spilled in total
   private var _diskBytesSpilled = 0L
   
-  // Use getSizeAsKb (not bytes) to maintain backwards compatibility of on units are provided
+  // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
   private val fileBufferSize = 
     sparkConf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 7d5cf7b61e56a..3b9d14f9372b6 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -110,7 +110,7 @@ private[spark] class ExternalSorter[K, V, C](
   private val conf = SparkEnv.get.conf
   private val spillingEnabled = conf.getBoolean("spark.shuffle.spill", true)
   
-  // Use getSizeAsKb (not bytes) to maintain backwards compatibility of on units are provided
+  // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
   private val fileBufferSize = conf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024
   private val transferToEnabled = conf.getBoolean("spark.file.transferTo", true)
 
diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java
new file mode 100644
index 0000000000000..db9e82759090a
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import org.apache.spark.unsafe.memory.ExecutorMemoryManager;
+import org.apache.spark.unsafe.memory.MemoryAllocator;
+import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.unsafe.memory.TaskMemoryManager;
+import static org.apache.spark.shuffle.unsafe.PackedRecordPointer.*;
+
+public class PackedRecordPointerSuite {
+
+  @Test
+  public void heap() {
+    final TaskMemoryManager memoryManager =
+      new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP));
+    final MemoryBlock page0 = memoryManager.allocatePage(100);
+    final MemoryBlock page1 = memoryManager.allocatePage(100);
+    final long addressInPage1 = memoryManager.encodePageNumberAndOffset(page1,
+      page1.getBaseOffset() + 42);
+    PackedRecordPointer packedPointer = new PackedRecordPointer();
+    packedPointer.set(PackedRecordPointer.packPointer(addressInPage1, 360));
+    assertEquals(360, packedPointer.getPartitionId());
+    final long recordPointer = packedPointer.getRecordPointer();
+    assertEquals(1, TaskMemoryManager.decodePageNumber(recordPointer));
+    assertEquals(page1.getBaseOffset() + 42, memoryManager.getOffsetInPage(recordPointer));
+    assertEquals(addressInPage1, recordPointer);
+    memoryManager.cleanUpAllAllocatedMemory();
+  }
+
+  @Test
+  public void offHeap() {
+    final TaskMemoryManager memoryManager =
+      new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.UNSAFE));
+    final MemoryBlock page0 = memoryManager.allocatePage(100);
+    final MemoryBlock page1 = memoryManager.allocatePage(100);
+    final long addressInPage1 = memoryManager.encodePageNumberAndOffset(page1,
+      page1.getBaseOffset() + 42);
+    PackedRecordPointer packedPointer = new PackedRecordPointer();
+    packedPointer.set(PackedRecordPointer.packPointer(addressInPage1, 360));
+    assertEquals(360, packedPointer.getPartitionId());
+    final long recordPointer = packedPointer.getRecordPointer();
+    assertEquals(1, TaskMemoryManager.decodePageNumber(recordPointer));
+    assertEquals(page1.getBaseOffset() + 42, memoryManager.getOffsetInPage(recordPointer));
+    assertEquals(addressInPage1, recordPointer);
+    memoryManager.cleanUpAllAllocatedMemory();
+  }
+
+  @Test
+  public void maximumPartitionIdCanBeEncoded() {
+    PackedRecordPointer packedPointer = new PackedRecordPointer();
+    packedPointer.set(PackedRecordPointer.packPointer(0, MAXIMUM_PARTITION_ID));
+    assertEquals(MAXIMUM_PARTITION_ID, packedPointer.getPartitionId());
+  }
+
+  @Test
+  public void partitionIdsGreaterThanMaximumPartitionIdWillOverflowOrTriggerError() {
+    PackedRecordPointer packedPointer = new PackedRecordPointer();
+    try {
+      // Pointers greater than the maximum partition ID will overflow or trigger an assertion error
+      packedPointer.set(PackedRecordPointer.packPointer(0, MAXIMUM_PARTITION_ID + 1));
+      assertFalse(MAXIMUM_PARTITION_ID  + 1 == packedPointer.getPartitionId());
+    } catch (AssertionError e ) {
+      // pass
+    }
+  }
+
+  @Test
+  public void maximumOffsetInPageCanBeEncoded() {
+    PackedRecordPointer packedPointer = new PackedRecordPointer();
+    long address = TaskMemoryManager.encodePageNumberAndOffset(0, MAXIMUM_PAGE_SIZE_BYTES - 1);
+    packedPointer.set(PackedRecordPointer.packPointer(address, 0));
+    assertEquals(address, packedPointer.getRecordPointer());
+  }
+
+  @Test
+  public void offsetsPastMaxOffsetInPageWillOverflow() {
+    PackedRecordPointer packedPointer = new PackedRecordPointer();
+    long address = TaskMemoryManager.encodePageNumberAndOffset(0, MAXIMUM_PAGE_SIZE_BYTES);
+    packedPointer.set(PackedRecordPointer.packPointer(address, 0));
+    assertEquals(0, packedPointer.getRecordPointer());
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java
new file mode 100644
index 0000000000000..8fa72597db24d
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.spark.HashPartitioner;
+import org.apache.spark.unsafe.PlatformDependent;
+import org.apache.spark.unsafe.memory.ExecutorMemoryManager;
+import org.apache.spark.unsafe.memory.MemoryAllocator;
+import org.apache.spark.unsafe.memory.MemoryBlock;
+import org.apache.spark.unsafe.memory.TaskMemoryManager;
+
+public class UnsafeShuffleInMemorySorterSuite {
+
+  private static String getStringFromDataPage(Object baseObject, long baseOffset, int strLength) {
+    final byte[] strBytes = new byte[strLength];
+    PlatformDependent.copyMemory(
+      baseObject,
+      baseOffset,
+      strBytes,
+      PlatformDependent.BYTE_ARRAY_OFFSET, strLength);
+    return new String(strBytes);
+  }
+
+  @Test
+  public void testSortingEmptyInput() {
+    final UnsafeShuffleInMemorySorter sorter = new UnsafeShuffleInMemorySorter(100);
+    final UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator iter = sorter.getSortedIterator();
+    assert(!iter.hasNext());
+  }
+
+  @Test
+  public void testBasicSorting() throws Exception {
+    final String[] dataToSort = new String[] {
+      "Boba",
+      "Pearls",
+      "Tapioca",
+      "Taho",
+      "Condensed Milk",
+      "Jasmine",
+      "Milk Tea",
+      "Lychee",
+      "Mango"
+    };
+    final TaskMemoryManager memoryManager =
+      new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP));
+    final MemoryBlock dataPage = memoryManager.allocatePage(2048);
+    final Object baseObject = dataPage.getBaseObject();
+    final UnsafeShuffleInMemorySorter sorter = new UnsafeShuffleInMemorySorter(4);
+    final HashPartitioner hashPartitioner = new HashPartitioner(4);
+
+    // Write the records into the data page and store pointers into the sorter
+    long position = dataPage.getBaseOffset();
+    for (String str : dataToSort) {
+      final long recordAddress = memoryManager.encodePageNumberAndOffset(dataPage, position);
+      final byte[] strBytes = str.getBytes("utf-8");
+      PlatformDependent.UNSAFE.putInt(baseObject, position, strBytes.length);
+      position += 4;
+      PlatformDependent.copyMemory(
+        strBytes,
+        PlatformDependent.BYTE_ARRAY_OFFSET,
+        baseObject,
+        position,
+        strBytes.length);
+      position += strBytes.length;
+      sorter.insertRecord(recordAddress, hashPartitioner.getPartition(str));
+    }
+
+    // Sort the records
+    final UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator iter = sorter.getSortedIterator();
+    int prevPartitionId = -1;
+    Arrays.sort(dataToSort);
+    for (int i = 0; i < dataToSort.length; i++) {
+      Assert.assertTrue(iter.hasNext());
+      iter.loadNext();
+      final int partitionId = iter.packedRecordPointer.getPartitionId();
+      Assert.assertTrue(partitionId >= 0 && partitionId <= 3);
+      Assert.assertTrue("Partition id " + partitionId + " should be >= prev id " + prevPartitionId,
+        partitionId >= prevPartitionId);
+      final long recordAddress = iter.packedRecordPointer.getRecordPointer();
+      final int recordLength = PlatformDependent.UNSAFE.getInt(
+        memoryManager.getPage(recordAddress), memoryManager.getOffsetInPage(recordAddress));
+      final String str = getStringFromDataPage(
+        memoryManager.getPage(recordAddress),
+        memoryManager.getOffsetInPage(recordAddress) + 4, // skip over record length
+        recordLength);
+      Assert.assertTrue(Arrays.binarySearch(dataToSort, str) != -1);
+    }
+    Assert.assertFalse(iter.hasNext());
+  }
+
+  @Test
+  public void testSortingManyNumbers() throws Exception {
+    UnsafeShuffleInMemorySorter sorter = new UnsafeShuffleInMemorySorter(4);
+    int[] numbersToSort = new int[128000];
+    Random random = new Random(16);
+    for (int i = 0; i < numbersToSort.length; i++) {
+      numbersToSort[i] = random.nextInt(PackedRecordPointer.MAXIMUM_PARTITION_ID + 1);
+      sorter.insertRecord(0, numbersToSort[i]);
+    }
+    Arrays.sort(numbersToSort);
+    int[] sorterResult = new int[numbersToSort.length];
+    UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator iter = sorter.getSortedIterator();
+    int j = 0;
+    while (iter.hasNext()) {
+      iter.loadNext();
+      sorterResult[j] = iter.packedRecordPointer.getPartitionId();
+      j += 1;
+    }
+    Assert.assertArrayEquals(numbersToSort, sorterResult);
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
new file mode 100644
index 0000000000000..730d265c87f88
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
@@ -0,0 +1,527 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.util.*;
+
+import scala.*;
+import scala.collection.Iterator;
+import scala.reflect.ClassTag;
+import scala.runtime.AbstractFunction1;
+
+import com.google.common.collect.HashMultiset;
+import com.google.common.io.ByteStreams;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.*;
+import static org.mockito.AdditionalAnswers.returnsFirstArg;
+import static org.mockito.Answers.RETURNS_SMART_NULLS;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.*;
+import org.apache.spark.io.CompressionCodec$;
+import org.apache.spark.io.LZ4CompressionCodec;
+import org.apache.spark.io.LZFCompressionCodec;
+import org.apache.spark.io.SnappyCompressionCodec;
+import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.executor.TaskMetrics;
+import org.apache.spark.network.util.LimitedInputStream;
+import org.apache.spark.serializer.*;
+import org.apache.spark.scheduler.MapStatus;
+import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.ShuffleMemoryManager;
+import org.apache.spark.storage.*;
+import org.apache.spark.unsafe.memory.ExecutorMemoryManager;
+import org.apache.spark.unsafe.memory.MemoryAllocator;
+import org.apache.spark.unsafe.memory.TaskMemoryManager;
+import org.apache.spark.util.Utils;
+
+public class UnsafeShuffleWriterSuite {
+
+  static final int NUM_PARTITITONS = 4;
+  final TaskMemoryManager taskMemoryManager =
+    new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP));
+  final HashPartitioner hashPartitioner = new HashPartitioner(NUM_PARTITITONS);
+  File mergedOutputFile;
+  File tempDir;
+  long[] partitionSizesInMergedFile;
+  final LinkedList<File> spillFilesCreated = new LinkedList<File>();
+  SparkConf conf;
+  final Serializer serializer = new KryoSerializer(new SparkConf());
+  TaskMetrics taskMetrics;
+
+  @Mock(answer = RETURNS_SMART_NULLS) ShuffleMemoryManager shuffleMemoryManager;
+  @Mock(answer = RETURNS_SMART_NULLS) BlockManager blockManager;
+  @Mock(answer = RETURNS_SMART_NULLS) IndexShuffleBlockResolver shuffleBlockResolver;
+  @Mock(answer = RETURNS_SMART_NULLS) DiskBlockManager diskBlockManager;
+  @Mock(answer = RETURNS_SMART_NULLS) TaskContext taskContext;
+  @Mock(answer = RETURNS_SMART_NULLS) ShuffleDependency<Object, Object, Object> shuffleDep;
+
+  private final class CompressStream extends AbstractFunction1<OutputStream, OutputStream> {
+    @Override
+    public OutputStream apply(OutputStream stream) {
+      if (conf.getBoolean("spark.shuffle.compress", true)) {
+        return CompressionCodec$.MODULE$.createCodec(conf).compressedOutputStream(stream);
+      } else {
+        return stream;
+      }
+    }
+  }
+
+  @After
+  public void tearDown() {
+    Utils.deleteRecursively(tempDir);
+    final long leakedMemory = taskMemoryManager.cleanUpAllAllocatedMemory();
+    if (leakedMemory != 0) {
+      fail("Test leaked " + leakedMemory + " bytes of managed memory");
+    }
+  }
+
+  @Before
+  @SuppressWarnings("unchecked")
+  public void setUp() throws IOException {
+    MockitoAnnotations.initMocks(this);
+    tempDir = Utils.createTempDir("test", "test");
+    mergedOutputFile = File.createTempFile("mergedoutput", "", tempDir);
+    partitionSizesInMergedFile = null;
+    spillFilesCreated.clear();
+    conf = new SparkConf();
+    taskMetrics = new TaskMetrics();
+
+    when(shuffleMemoryManager.tryToAcquire(anyLong())).then(returnsFirstArg());
+
+    when(blockManager.diskBlockManager()).thenReturn(diskBlockManager);
+    when(blockManager.getDiskWriter(
+      any(BlockId.class),
+      any(File.class),
+      any(SerializerInstance.class),
+      anyInt(),
+      any(ShuffleWriteMetrics.class))).thenAnswer(new Answer<DiskBlockObjectWriter>() {
+      @Override
+      public DiskBlockObjectWriter answer(InvocationOnMock invocationOnMock) throws Throwable {
+        Object[] args = invocationOnMock.getArguments();
+
+        return new DiskBlockObjectWriter(
+          (BlockId) args[0],
+          (File) args[1],
+          (SerializerInstance) args[2],
+          (Integer) args[3],
+          new CompressStream(),
+          false,
+          (ShuffleWriteMetrics) args[4]
+        );
+      }
+    });
+    when(blockManager.wrapForCompression(any(BlockId.class), any(InputStream.class))).thenAnswer(
+      new Answer<InputStream>() {
+        @Override
+        public InputStream answer(InvocationOnMock invocation) throws Throwable {
+          assert (invocation.getArguments()[0] instanceof TempShuffleBlockId);
+          InputStream is = (InputStream) invocation.getArguments()[1];
+          if (conf.getBoolean("spark.shuffle.compress", true)) {
+            return CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(is);
+          } else {
+            return is;
+          }
+        }
+      }
+    );
+
+    when(blockManager.wrapForCompression(any(BlockId.class), any(OutputStream.class))).thenAnswer(
+      new Answer<OutputStream>() {
+        @Override
+        public OutputStream answer(InvocationOnMock invocation) throws Throwable {
+          assert (invocation.getArguments()[0] instanceof TempShuffleBlockId);
+          OutputStream os = (OutputStream) invocation.getArguments()[1];
+          if (conf.getBoolean("spark.shuffle.compress", true)) {
+            return CompressionCodec$.MODULE$.createCodec(conf).compressedOutputStream(os);
+          } else {
+            return os;
+          }
+        }
+      }
+    );
+
+    when(shuffleBlockResolver.getDataFile(anyInt(), anyInt())).thenReturn(mergedOutputFile);
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
+        partitionSizesInMergedFile = (long[]) invocationOnMock.getArguments()[2];
+        return null;
+      }
+    }).when(shuffleBlockResolver).writeIndexFile(anyInt(), anyInt(), any(long[].class));
+
+    when(diskBlockManager.createTempShuffleBlock()).thenAnswer(
+      new Answer<Tuple2<TempShuffleBlockId, File>>() {
+        @Override
+        public Tuple2<TempShuffleBlockId, File> answer(
+          InvocationOnMock invocationOnMock) throws Throwable {
+          TempShuffleBlockId blockId = new TempShuffleBlockId(UUID.randomUUID());
+          File file = File.createTempFile("spillFile", ".spill", tempDir);
+          spillFilesCreated.add(file);
+          return Tuple2$.MODULE$.apply(blockId, file);
+        }
+      });
+
+    when(taskContext.taskMetrics()).thenReturn(taskMetrics);
+
+    when(shuffleDep.serializer()).thenReturn(Option.<Serializer>apply(serializer));
+    when(shuffleDep.partitioner()).thenReturn(hashPartitioner);
+  }
+
+  private UnsafeShuffleWriter<Object, Object> createWriter(
+      boolean transferToEnabled) throws IOException {
+    conf.set("spark.file.transferTo", String.valueOf(transferToEnabled));
+    return new UnsafeShuffleWriter<Object, Object>(
+      blockManager,
+      shuffleBlockResolver,
+      taskMemoryManager,
+      shuffleMemoryManager,
+      new UnsafeShuffleHandle<Object, Object>(0, 1, shuffleDep),
+      0, // map id
+      taskContext,
+      conf
+    );
+  }
+
+  private void assertSpillFilesWereCleanedUp() {
+    for (File spillFile : spillFilesCreated) {
+      assertFalse("Spill file " + spillFile.getPath() + " was not cleaned up",
+        spillFile.exists());
+    }
+  }
+
+  private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException {
+    final ArrayList<Tuple2<Object, Object>> recordsList = new ArrayList<Tuple2<Object, Object>>();
+    long startOffset = 0;
+    for (int i = 0; i < NUM_PARTITITONS; i++) {
+      final long partitionSize = partitionSizesInMergedFile[i];
+      if (partitionSize > 0) {
+        InputStream in = new FileInputStream(mergedOutputFile);
+        ByteStreams.skipFully(in, startOffset);
+        in = new LimitedInputStream(in, partitionSize);
+        if (conf.getBoolean("spark.shuffle.compress", true)) {
+          in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in);
+        }
+        DeserializationStream recordsStream = serializer.newInstance().deserializeStream(in);
+        Iterator<Tuple2<Object, Object>> records = recordsStream.asKeyValueIterator();
+        while (records.hasNext()) {
+          Tuple2<Object, Object> record = records.next();
+          assertEquals(i, hashPartitioner.getPartition(record._1()));
+          recordsList.add(record);
+        }
+        recordsStream.close();
+        startOffset += partitionSize;
+      }
+    }
+    return recordsList;
+  }
+
+  @Test(expected=IllegalStateException.class)
+  public void mustCallWriteBeforeSuccessfulStop() throws IOException {
+    createWriter(false).stop(true);
+  }
+
+  @Test
+  public void doNotNeedToCallWriteBeforeUnsuccessfulStop() throws IOException {
+    createWriter(false).stop(false);
+  }
+
+  @Test
+  public void writeEmptyIterator() throws Exception {
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
+    writer.write(Collections.<Product2<Object, Object>>emptyIterator());
+    final Option<MapStatus> mapStatus = writer.stop(true);
+    assertTrue(mapStatus.isDefined());
+    assertTrue(mergedOutputFile.exists());
+    assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile);
+    assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleRecordsWritten());
+    assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleBytesWritten());
+    assertEquals(0, taskMetrics.diskBytesSpilled());
+    assertEquals(0, taskMetrics.memoryBytesSpilled());
+  }
+
+  @Test
+  public void writeWithoutSpilling() throws Exception {
+    // In this example, each partition should have exactly one record:
+    final ArrayList<Product2<Object, Object>> dataToWrite =
+      new ArrayList<Product2<Object, Object>>();
+    for (int i = 0; i < NUM_PARTITITONS; i++) {
+      dataToWrite.add(new Tuple2<Object, Object>(i, i));
+    }
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
+    writer.write(dataToWrite.iterator());
+    final Option<MapStatus> mapStatus = writer.stop(true);
+    assertTrue(mapStatus.isDefined());
+    assertTrue(mergedOutputFile.exists());
+
+    long sumOfPartitionSizes = 0;
+    for (long size: partitionSizesInMergedFile) {
+      // All partitions should be the same size:
+      assertEquals(partitionSizesInMergedFile[0], size);
+      sumOfPartitionSizes += size;
+    }
+    assertEquals(mergedOutputFile.length(), sumOfPartitionSizes);
+    assertEquals(
+      HashMultiset.create(dataToWrite),
+      HashMultiset.create(readRecordsFromFile()));
+    assertSpillFilesWereCleanedUp();
+    ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get();
+    assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten());
+    assertEquals(0, taskMetrics.diskBytesSpilled());
+    assertEquals(0, taskMetrics.memoryBytesSpilled());
+    assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten());
+  }
+
+  private void testMergingSpills(
+      boolean transferToEnabled,
+      String compressionCodecName) throws IOException {
+    if (compressionCodecName != null) {
+      conf.set("spark.shuffle.compress", "true");
+      conf.set("spark.io.compression.codec", compressionCodecName);
+    } else {
+      conf.set("spark.shuffle.compress", "false");
+    }
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(transferToEnabled);
+    final ArrayList<Product2<Object, Object>> dataToWrite =
+      new ArrayList<Product2<Object, Object>>();
+    for (int i : new int[] { 1, 2, 3, 4, 4, 2 }) {
+      dataToWrite.add(new Tuple2<Object, Object>(i, i));
+    }
+    writer.insertRecordIntoSorter(dataToWrite.get(0));
+    writer.insertRecordIntoSorter(dataToWrite.get(1));
+    writer.insertRecordIntoSorter(dataToWrite.get(2));
+    writer.insertRecordIntoSorter(dataToWrite.get(3));
+    writer.forceSorterToSpill();
+    writer.insertRecordIntoSorter(dataToWrite.get(4));
+    writer.insertRecordIntoSorter(dataToWrite.get(5));
+    writer.closeAndWriteOutput();
+    final Option<MapStatus> mapStatus = writer.stop(true);
+    assertTrue(mapStatus.isDefined());
+    assertTrue(mergedOutputFile.exists());
+    assertEquals(2, spillFilesCreated.size());
+
+    long sumOfPartitionSizes = 0;
+    for (long size: partitionSizesInMergedFile) {
+      sumOfPartitionSizes += size;
+    }
+    assertEquals(sumOfPartitionSizes, mergedOutputFile.length());
+
+    assertEquals(
+      HashMultiset.create(dataToWrite),
+      HashMultiset.create(readRecordsFromFile()));
+    assertSpillFilesWereCleanedUp();
+    ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get();
+    assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten());
+    assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L));
+    assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length()));
+    assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L));
+    assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten());
+  }
+
+  @Test
+  public void mergeSpillsWithTransferToAndLZF() throws Exception {
+    testMergingSpills(true, LZFCompressionCodec.class.getName());
+  }
+
+  @Test
+  public void mergeSpillsWithFileStreamAndLZF() throws Exception {
+    testMergingSpills(false, LZFCompressionCodec.class.getName());
+  }
+
+  @Test
+  public void mergeSpillsWithTransferToAndLZ4() throws Exception {
+    testMergingSpills(true, LZ4CompressionCodec.class.getName());
+  }
+
+  @Test
+  public void mergeSpillsWithFileStreamAndLZ4() throws Exception {
+    testMergingSpills(false, LZ4CompressionCodec.class.getName());
+  }
+
+  @Test
+  public void mergeSpillsWithTransferToAndSnappy() throws Exception {
+    testMergingSpills(true, SnappyCompressionCodec.class.getName());
+  }
+
+  @Test
+  public void mergeSpillsWithFileStreamAndSnappy() throws Exception {
+    testMergingSpills(false, SnappyCompressionCodec.class.getName());
+  }
+
+  @Test
+  public void mergeSpillsWithTransferToAndNoCompression() throws Exception {
+    testMergingSpills(true, null);
+  }
+
+  @Test
+  public void mergeSpillsWithFileStreamAndNoCompression() throws Exception {
+    testMergingSpills(false, null);
+  }
+
+  @Test
+  public void writeEnoughDataToTriggerSpill() throws Exception {
+    when(shuffleMemoryManager.tryToAcquire(anyLong()))
+      .then(returnsFirstArg()) // Allocate initial sort buffer
+      .then(returnsFirstArg()) // Allocate initial data page
+      .thenReturn(0L) // Deny request to allocate new data page
+      .then(returnsFirstArg());  // Grant new sort buffer and data page.
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
+    final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<Product2<Object, Object>>();
+    final byte[] bigByteArray = new byte[PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES / 128];
+    for (int i = 0; i < 128 + 1; i++) {
+      dataToWrite.add(new Tuple2<Object, Object>(i, bigByteArray));
+    }
+    writer.write(dataToWrite.iterator());
+    verify(shuffleMemoryManager, times(5)).tryToAcquire(anyLong());
+    assertEquals(2, spillFilesCreated.size());
+    writer.stop(true);
+    readRecordsFromFile();
+    assertSpillFilesWereCleanedUp();
+    ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get();
+    assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten());
+    assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L));
+    assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length()));
+    assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L));
+    assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten());
+  }
+
+  @Test
+  public void writeEnoughRecordsToTriggerSortBufferExpansionAndSpill() throws Exception {
+    when(shuffleMemoryManager.tryToAcquire(anyLong()))
+      .then(returnsFirstArg()) // Allocate initial sort buffer
+      .then(returnsFirstArg()) // Allocate initial data page
+      .thenReturn(0L) // Deny request to grow sort buffer
+      .then(returnsFirstArg());  // Grant new sort buffer and data page.
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
+    final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<Product2<Object, Object>>();
+    for (int i = 0; i < UnsafeShuffleWriter.INITIAL_SORT_BUFFER_SIZE; i++) {
+      dataToWrite.add(new Tuple2<Object, Object>(i, i));
+    }
+    writer.write(dataToWrite.iterator());
+    verify(shuffleMemoryManager, times(5)).tryToAcquire(anyLong());
+    assertEquals(2, spillFilesCreated.size());
+    writer.stop(true);
+    readRecordsFromFile();
+    assertSpillFilesWereCleanedUp();
+    ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get();
+    assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten());
+    assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L));
+    assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length()));
+    assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L));
+    assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten());
+  }
+
+  @Test
+  public void writeRecordsThatAreBiggerThanDiskWriteBufferSize() throws Exception {
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
+    final ArrayList<Product2<Object, Object>> dataToWrite =
+      new ArrayList<Product2<Object, Object>>();
+    final byte[] bytes = new byte[(int) (UnsafeShuffleExternalSorter.DISK_WRITE_BUFFER_SIZE * 2.5)];
+    new Random(42).nextBytes(bytes);
+    dataToWrite.add(new Tuple2<Object, Object>(1, ByteBuffer.wrap(bytes)));
+    writer.write(dataToWrite.iterator());
+    writer.stop(true);
+    assertEquals(
+      HashMultiset.create(dataToWrite),
+      HashMultiset.create(readRecordsFromFile()));
+    assertSpillFilesWereCleanedUp();
+  }
+
+  @Test
+  public void writeRecordsThatAreBiggerThanMaxRecordSize() throws Exception {
+    // Use a custom serializer so that we have exact control over the size of serialized data.
+    final Serializer byteArraySerializer = new Serializer() {
+      @Override
+      public SerializerInstance newInstance() {
+        return new SerializerInstance() {
+          @Override
+          public SerializationStream serializeStream(final OutputStream s) {
+            return new SerializationStream() {
+              @Override
+              public void flush() { }
+
+              @Override
+              public <T> SerializationStream writeObject(T t, ClassTag<T> ev1) {
+                byte[] bytes = (byte[]) t;
+                try {
+                  s.write(bytes);
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
+                return this;
+              }
+
+              @Override
+              public void close() { }
+            };
+          }
+          public <T> ByteBuffer serialize(T t, ClassTag<T> ev1) { return null; }
+          public DeserializationStream deserializeStream(InputStream s) { return null; }
+          public <T> T deserialize(ByteBuffer b, ClassLoader l, ClassTag<T> ev1) { return null; }
+          public <T> T deserialize(ByteBuffer bytes, ClassTag<T> ev1) { return null; }
+        };
+      }
+    };
+    when(shuffleDep.serializer()).thenReturn(Option.<Serializer>apply(byteArraySerializer));
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
+    // Insert a record and force a spill so that there's something to clean up:
+    writer.insertRecordIntoSorter(new Tuple2<Object, Object>(new byte[1], new byte[1]));
+    writer.forceSorterToSpill();
+    // We should be able to write a record that's right _at_ the max record size
+    final byte[] atMaxRecordSize = new byte[UnsafeShuffleExternalSorter.MAX_RECORD_SIZE];
+    new Random(42).nextBytes(atMaxRecordSize);
+    writer.insertRecordIntoSorter(new Tuple2<Object, Object>(new byte[0], atMaxRecordSize));
+    writer.forceSorterToSpill();
+    // Inserting a record that's larger than the max record size should fail:
+    final byte[] exceedsMaxRecordSize = new byte[UnsafeShuffleExternalSorter.MAX_RECORD_SIZE + 1];
+    new Random(42).nextBytes(exceedsMaxRecordSize);
+    Product2<Object, Object> hugeRecord =
+      new Tuple2<Object, Object>(new byte[0], exceedsMaxRecordSize);
+    try {
+      // Here, we write through the public `write()` interface instead of the test-only
+      // `insertRecordIntoSorter` interface:
+      writer.write(Collections.singletonList(hugeRecord).iterator());
+      fail("Expected exception to be thrown");
+    } catch (IOException e) {
+      // Pass
+    }
+    assertSpillFilesWereCleanedUp();
+  }
+
+  @Test
+  public void spillFilesAreDeletedWhenStoppingAfterError() throws IOException {
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
+    writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
+    writer.insertRecordIntoSorter(new Tuple2<Object, Object>(2, 2));
+    writer.forceSorterToSpill();
+    writer.insertRecordIntoSorter(new Tuple2<Object, Object>(2, 2));
+    writer.stop(false);
+    assertSpillFilesWereCleanedUp();
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 8c6035fb367fe..cf6a143537889 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.io
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 
+import com.google.common.io.ByteStreams
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkConf
@@ -62,6 +63,14 @@ class CompressionCodecSuite extends FunSuite {
     testCodec(codec)
   }
 
+  test("lz4 does not support concatenation of serialized streams") {
+    val codec = CompressionCodec.createCodec(conf, classOf[LZ4CompressionCodec].getName)
+    assert(codec.getClass === classOf[LZ4CompressionCodec])
+    intercept[Exception] {
+      testConcatenationOfSerializedStreams(codec)
+    }
+  }
+
   test("lzf compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
     assert(codec.getClass === classOf[LZFCompressionCodec])
@@ -74,6 +83,12 @@ class CompressionCodecSuite extends FunSuite {
     testCodec(codec)
   }
 
+  test("lzf supports concatenation of serialized streams") {
+    val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
+    assert(codec.getClass === classOf[LZFCompressionCodec])
+    testConcatenationOfSerializedStreams(codec)
+  }
+
   test("snappy compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
     assert(codec.getClass === classOf[SnappyCompressionCodec])
@@ -86,9 +101,38 @@ class CompressionCodecSuite extends FunSuite {
     testCodec(codec)
   }
 
+  test("snappy does not support concatenation of serialized streams") {
+    val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
+    assert(codec.getClass === classOf[SnappyCompressionCodec])
+    intercept[Exception] {
+      testConcatenationOfSerializedStreams(codec)
+    }
+  }
+
   test("bad compression codec") {
     intercept[IllegalArgumentException] {
       CompressionCodec.createCodec(conf, "foobar")
     }
   }
+
+  private def testConcatenationOfSerializedStreams(codec: CompressionCodec): Unit = {
+    val bytes1: Array[Byte] = {
+      val baos = new ByteArrayOutputStream()
+      val out = codec.compressedOutputStream(baos)
+      (0 to 64).foreach(out.write)
+      out.close()
+      baos.toByteArray
+    }
+    val bytes2: Array[Byte] = {
+      val baos = new ByteArrayOutputStream()
+      val out = codec.compressedOutputStream(baos)
+      (65 to 127).foreach(out.write)
+      out.close()
+      baos.toByteArray
+    }
+    val concatenatedBytes = codec.compressedInputStream(new ByteArrayInputStream(bytes1 ++ bytes2))
+    val decompressed: Array[Byte] = new Array[Byte](128)
+    ByteStreams.readFully(concatenatedBytes, decompressed)
+    assert(decompressed.toSeq === (0 to 127))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala
new file mode 100644
index 0000000000000..ed4d8ce632e16
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import org.apache.spark.SparkConf
+import org.scalatest.FunSuite
+
+class JavaSerializerSuite extends FunSuite {
+  test("JavaSerializer instances are serializable") {
+    val serializer = new JavaSerializer(new SparkConf())
+    val instance = serializer.newInstance()
+    instance.deserialize[JavaSerializer](instance.serialize(serializer))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala
new file mode 100644
index 0000000000000..49a04a2a45280
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe
+
+import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
+import org.mockito.stubbing.Answer
+import org.scalatest.{FunSuite, Matchers}
+
+import org.apache.spark._
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, Serializer}
+
+/**
+ * Tests for the fallback logic in UnsafeShuffleManager. Actual tests of shuffling data are
+ * performed in other suites.
+ */
+class UnsafeShuffleManagerSuite extends FunSuite with Matchers {
+
+  import UnsafeShuffleManager.canUseUnsafeShuffle
+
+  private class RuntimeExceptionAnswer extends Answer[Object] {
+    override def answer(invocation: InvocationOnMock): Object = {
+      throw new RuntimeException("Called non-stubbed method, " + invocation.getMethod.getName)
+    }
+  }
+
+  private def shuffleDep(
+      partitioner: Partitioner,
+      serializer: Option[Serializer],
+      keyOrdering: Option[Ordering[Any]],
+      aggregator: Option[Aggregator[Any, Any, Any]],
+      mapSideCombine: Boolean): ShuffleDependency[Any, Any, Any] = {
+    val dep = mock(classOf[ShuffleDependency[Any, Any, Any]], new RuntimeExceptionAnswer())
+    doReturn(0).when(dep).shuffleId
+    doReturn(partitioner).when(dep).partitioner
+    doReturn(serializer).when(dep).serializer
+    doReturn(keyOrdering).when(dep).keyOrdering
+    doReturn(aggregator).when(dep).aggregator
+    doReturn(mapSideCombine).when(dep).mapSideCombine
+    dep
+  }
+
+  test("supported shuffle dependencies") {
+    val kryo = Some(new KryoSerializer(new SparkConf()))
+
+    assert(canUseUnsafeShuffle(shuffleDep(
+      partitioner = new HashPartitioner(2),
+      serializer = kryo,
+      keyOrdering = None,
+      aggregator = None,
+      mapSideCombine = false
+    )))
+
+    val rangePartitioner = mock(classOf[RangePartitioner[Any, Any]])
+    when(rangePartitioner.numPartitions).thenReturn(2)
+    assert(canUseUnsafeShuffle(shuffleDep(
+      partitioner = rangePartitioner,
+      serializer = kryo,
+      keyOrdering = None,
+      aggregator = None,
+      mapSideCombine = false
+    )))
+
+  }
+
+  test("unsupported shuffle dependencies") {
+    val kryo = Some(new KryoSerializer(new SparkConf()))
+    val java = Some(new JavaSerializer(new SparkConf()))
+
+    // We only support serializers that support object relocation
+    assert(!canUseUnsafeShuffle(shuffleDep(
+      partitioner = new HashPartitioner(2),
+      serializer = java,
+      keyOrdering = None,
+      aggregator = None,
+      mapSideCombine = false
+    )))
+
+    // We do not support shuffles with more than 16 million output partitions
+    assert(!canUseUnsafeShuffle(shuffleDep(
+      partitioner = new HashPartitioner(UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS + 1),
+      serializer = kryo,
+      keyOrdering = None,
+      aggregator = None,
+      mapSideCombine = false
+    )))
+
+    // We do not support shuffles that perform any kind of aggregation or sorting of keys
+    assert(!canUseUnsafeShuffle(shuffleDep(
+      partitioner = new HashPartitioner(2),
+      serializer = kryo,
+      keyOrdering = Some(mock(classOf[Ordering[Any]])),
+      aggregator = None,
+      mapSideCombine = false
+    )))
+    assert(!canUseUnsafeShuffle(shuffleDep(
+      partitioner = new HashPartitioner(2),
+      serializer = kryo,
+      keyOrdering = None,
+      aggregator = Some(mock(classOf[Aggregator[Any, Any, Any]])),
+      mapSideCombine = false
+    )))
+    // We do not support shuffles that perform any kind of aggregation or sorting of keys
+    assert(!canUseUnsafeShuffle(shuffleDep(
+      partitioner = new HashPartitioner(2),
+      serializer = kryo,
+      keyOrdering = Some(mock(classOf[Ordering[Any]])),
+      aggregator = Some(mock(classOf[Aggregator[Any, Any, Any]])),
+      mapSideCombine = true
+    )))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala
new file mode 100644
index 0000000000000..6351539e91e97
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.unsafe
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+
+import org.apache.commons.io.FileUtils
+import org.apache.commons.io.filefilter.TrueFileFilter
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.{HashPartitioner, ShuffleDependency, SparkContext, ShuffleSuite}
+import org.apache.spark.rdd.ShuffledRDD
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
+import org.apache.spark.util.Utils
+
+class UnsafeShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
+
+  // This test suite should run all tests in ShuffleSuite with unsafe-based shuffle.
+
+  override def beforeAll() {
+    conf.set("spark.shuffle.manager", "tungsten-sort")
+    // UnsafeShuffleManager requires at least 128 MB of memory per task in order to be able to sort
+    // shuffle records.
+    conf.set("spark.shuffle.memoryFraction", "0.5")
+  }
+
+  test("UnsafeShuffleManager properly cleans up files for shuffles that use the new shuffle path") {
+    val tmpDir = Utils.createTempDir()
+    try {
+      val myConf = conf.clone()
+        .set("spark.local.dir", tmpDir.getAbsolutePath)
+      sc = new SparkContext("local", "test", myConf)
+      // Create a shuffled RDD and verify that it will actually use the new UnsafeShuffle path
+      val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
+      val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
+        .setSerializer(new KryoSerializer(myConf))
+      val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
+      assert(UnsafeShuffleManager.canUseUnsafeShuffle(shuffleDep))
+      def getAllFiles: Set[File] =
+        FileUtils.listFiles(tmpDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
+      val filesBeforeShuffle = getAllFiles
+      // Force the shuffle to be performed
+      shuffledRdd.count()
+      // Ensure that the shuffle actually created files that will need to be cleaned up
+      val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
+      filesCreatedByShuffle.map(_.getName) should be
+        Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
+      // Check that the cleanup actually removes the files
+      sc.env.blockManager.master.removeShuffle(0, blocking = true)
+      for (file <- filesCreatedByShuffle) {
+        assert (!file.exists(), s"Shuffle file $file was not cleaned up")
+      }
+    } finally {
+      Utils.deleteRecursively(tmpDir)
+    }
+  }
+
+  test("UnsafeShuffleManager properly cleans up files for shuffles that use the old shuffle path") {
+    val tmpDir = Utils.createTempDir()
+    try {
+      val myConf = conf.clone()
+        .set("spark.local.dir", tmpDir.getAbsolutePath)
+      sc = new SparkContext("local", "test", myConf)
+      // Create a shuffled RDD and verify that it will actually use the old SortShuffle path
+      val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x))
+      val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4))
+        .setSerializer(new JavaSerializer(myConf))
+      val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]]
+      assert(!UnsafeShuffleManager.canUseUnsafeShuffle(shuffleDep))
+      def getAllFiles: Set[File] =
+        FileUtils.listFiles(tmpDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
+      val filesBeforeShuffle = getAllFiles
+      // Force the shuffle to be performed
+      shuffledRdd.count()
+      // Ensure that the shuffle actually created files that will need to be cleaned up
+      val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle
+      filesCreatedByShuffle.map(_.getName) should be
+        Set("shuffle_0_0_0.data", "shuffle_0_0_0.index")
+      // Check that the cleanup actually removes the files
+      sc.env.blockManager.master.removeShuffle(0, blocking = true)
+      for (file <- filesCreatedByShuffle) {
+        assert (!file.exists(), s"Shuffle file $file was not cleaned up")
+      }
+    } finally {
+      Utils.deleteRecursively(tmpDir)
+    }
+  }
+}
diff --git a/pom.xml b/pom.xml
index cf9279ea5a2a6..564a443466e5a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -669,7 +669,7 @@
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-all</artifactId>
-        <version>1.9.0</version>
+        <version>1.9.5</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -684,6 +684,18 @@
         <version>4.10</version>
         <scope>test</scope>
       </dependency>
+      <dependency>
+        <groupId>org.hamcrest</groupId>
+        <artifactId>hamcrest-core</artifactId>
+        <version>1.3</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.hamcrest</groupId>
+        <artifactId>hamcrest-library</artifactId>
+        <version>1.3</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>com.novocode</groupId>
         <artifactId>junit-interface</artifactId>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index fba7290dcb0b5..487062a31f77f 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -131,6 +131,12 @@ object MimaExcludes {
             // SPARK-7530 Added StreamingContext.getState()
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.streaming.StreamingContext.state_=")
+          ) ++ Seq(
+            // SPARK-7081 changed ShuffleWriter from a trait to an abstract class and removed some
+            // unnecessary type bounds in order to fix some compiler warnings that occurred when
+            // implementing this interface in Java. Note that ShuffleWriter is private[spark].
+            ProblemFilters.exclude[IncompatibleTemplateDefProblem](
+              "org.apache.spark.shuffle.ShuffleWriter")
           )
 
         case v if v.startsWith("1.3") =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index c3d2c7019a54a..3e46596ecf6ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -17,17 +17,18 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.{HashPartitioner, Partitioner, RangePartitioner, SparkEnv}
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.{RDD, ShuffledRDD}
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.sql.{SQLContext, Row}
+import org.apache.spark.shuffle.sort.SortShuffleManager
+import org.apache.spark.shuffle.unsafe.UnsafeShuffleManager
 import org.apache.spark.sql.catalyst.errors.attachTree
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.util.MutablePair
 
 object Exchange {
@@ -85,7 +86,9 @@ case class Exchange(
     // corner-cases where a partitioner constructed with `numPartitions` partitions may output
     // fewer partitions (like RangePartitioner, for example).
     val conf = child.sqlContext.sparkContext.conf
-    val sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
+    val shuffleManager = SparkEnv.get.shuffleManager
+    val sortBasedShuffleOn = shuffleManager.isInstanceOf[SortShuffleManager] ||
+      shuffleManager.isInstanceOf[UnsafeShuffleManager]
     val bypassMergeThreshold = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
     val serializeMapOutputs = conf.getBoolean("spark.shuffle.sort.serializeMapOutputs", true)
     if (newOrdering.nonEmpty) {
@@ -93,11 +96,11 @@ case class Exchange(
       // which requires a defensive copy.
       true
     } else if (sortBasedShuffleOn) {
-      // Spark's sort-based shuffle also uses `ExternalSorter` to buffer records in memory.
-      // However, there are two special cases where we can avoid the copy, described below:
-      if (partitioner.numPartitions <= bypassMergeThreshold) {
-        // If the number of output partitions is sufficiently small, then Spark will fall back to
-        // the old hash-based shuffle write path which doesn't buffer deserialized records.
+      val bypassIsSupported = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
+      if (bypassIsSupported && partitioner.numPartitions <= bypassMergeThreshold) {
+        // If we're using the original SortShuffleManager and the number of output partitions is
+        // sufficiently small, then Spark will fall back to the hash-based shuffle write path, which
+        // doesn't buffer deserialized records.
         // Note that we'll have to remove this case if we fix SPARK-6026 and remove this bypass.
         false
       } else if (serializeMapOutputs && serializer.supportsRelocationOfSerializedObjects) {
@@ -105,9 +108,14 @@ case class Exchange(
         // them. This optimization is guarded by a feature-flag and is only applied in cases where
         // shuffle dependency does not specify an ordering and the record serializer has certain
         // properties. If this optimization is enabled, we can safely avoid the copy.
+        //
+        // This optimization also applies to UnsafeShuffleManager (added in SPARK-7081).
         false
       } else {
-        // None of the special cases held, so we must copy.
+        // Spark's SortShuffleManager uses `ExternalSorter` to buffer records in memory. This code
+        // path is used both when SortShuffleManager is used and when UnsafeShuffleManager falls
+        // back to SortShuffleManager to perform a shuffle that the new fast path can't handle. In
+        // both cases, we must copy.
         true
       }
     } else {
diff --git a/unsafe/pom.xml b/unsafe/pom.xml
index 5b0733206b2bc..9e151fc7a9141 100644
--- a/unsafe/pom.xml
+++ b/unsafe/pom.xml
@@ -42,6 +42,10 @@
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
 
     <!-- Provided dependencies -->
     <dependency>
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java
index 9224988e6ad69..2906ac8abad1a 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java
@@ -19,6 +19,7 @@
 
 import java.util.*;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -47,10 +48,18 @@ public final class TaskMemoryManager {
 
   private final Logger logger = LoggerFactory.getLogger(TaskMemoryManager.class);
 
-  /**
-   * The number of entries in the page table.
-   */
-  private static final int PAGE_TABLE_SIZE = 1 << 13;
+  /** The number of bits used to address the page table. */
+  private static final int PAGE_NUMBER_BITS = 13;
+
+  /** The number of bits used to encode offsets in data pages. */
+  @VisibleForTesting
+  static final int OFFSET_BITS = 64 - PAGE_NUMBER_BITS;  // 51
+
+  /** The number of entries in the page table. */
+  private static final int PAGE_TABLE_SIZE = 1 << PAGE_NUMBER_BITS;
+
+  /** Maximum supported data page size */
+  private static final long MAXIMUM_PAGE_SIZE = (1L << OFFSET_BITS);
 
   /** Bit mask for the lower 51 bits of a long. */
   private static final long MASK_LONG_LOWER_51_BITS = 0x7FFFFFFFFFFFFL;
@@ -101,11 +110,9 @@ public TaskMemoryManager(ExecutorMemoryManager executorMemoryManager) {
    * intended for allocating large blocks of memory that will be shared between operators.
    */
   public MemoryBlock allocatePage(long size) {
-    if (logger.isTraceEnabled()) {
-      logger.trace("Allocating {} byte page", size);
-    }
-    if (size >= (1L << 51)) {
-      throw new IllegalArgumentException("Cannot allocate a page with more than 2^51 bytes");
+    if (size > MAXIMUM_PAGE_SIZE) {
+      throw new IllegalArgumentException(
+        "Cannot allocate a page with more than " + MAXIMUM_PAGE_SIZE + " bytes");
     }
 
     final int pageNumber;
@@ -120,8 +127,8 @@ public MemoryBlock allocatePage(long size) {
     final MemoryBlock page = executorMemoryManager.allocate(size);
     page.pageNumber = pageNumber;
     pageTable[pageNumber] = page;
-    if (logger.isDebugEnabled()) {
-      logger.debug("Allocate page number {} ({} bytes)", pageNumber, size);
+    if (logger.isTraceEnabled()) {
+      logger.trace("Allocate page number {} ({} bytes)", pageNumber, size);
     }
     return page;
   }
@@ -130,9 +137,6 @@ public MemoryBlock allocatePage(long size) {
    * Free a block of memory allocated via {@link TaskMemoryManager#allocatePage(long)}.
    */
   public void freePage(MemoryBlock page) {
-    if (logger.isTraceEnabled()) {
-      logger.trace("Freeing page number {} ({} bytes)", page.pageNumber, page.size());
-    }
     assert (page.pageNumber != -1) :
       "Called freePage() on memory that wasn't allocated with allocatePage()";
     executorMemoryManager.free(page);
@@ -140,8 +144,8 @@ public void freePage(MemoryBlock page) {
       allocatedPages.clear(page.pageNumber);
     }
     pageTable[page.pageNumber] = null;
-    if (logger.isDebugEnabled()) {
-      logger.debug("Freed page number {} ({} bytes)", page.pageNumber, page.size());
+    if (logger.isTraceEnabled()) {
+      logger.trace("Freed page number {} ({} bytes)", page.pageNumber, page.size());
     }
   }
 
@@ -173,14 +177,36 @@ public void free(MemoryBlock memory) {
   /**
    * Given a memory page and offset within that page, encode this address into a 64-bit long.
    * This address will remain valid as long as the corresponding page has not been freed.
+   *
+   * @param page a data page allocated by {@link TaskMemoryManager#allocate(long)}.
+   * @param offsetInPage an offset in this page which incorporates the base offset. In other words,
+   *                     this should be the value that you would pass as the base offset into an
+   *                     UNSAFE call (e.g. page.baseOffset() + something).
+   * @return an encoded page address.
    */
   public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) {
-    if (inHeap) {
-      assert (page.pageNumber != -1) : "encodePageNumberAndOffset called with invalid page";
-      return (((long) page.pageNumber) << 51) | (offsetInPage & MASK_LONG_LOWER_51_BITS);
-    } else {
-      return offsetInPage;
+    if (!inHeap) {
+      // In off-heap mode, an offset is an absolute address that may require a full 64 bits to
+      // encode. Due to our page size limitation, though, we can convert this into an offset that's
+      // relative to the page's base offset; this relative offset will fit in 51 bits.
+      offsetInPage -= page.getBaseOffset();
     }
+    return encodePageNumberAndOffset(page.pageNumber, offsetInPage);
+  }
+
+  @VisibleForTesting
+  public static long encodePageNumberAndOffset(int pageNumber, long offsetInPage) {
+    assert (pageNumber != -1) : "encodePageNumberAndOffset called with invalid page";
+    return (((long) pageNumber) << OFFSET_BITS) | (offsetInPage & MASK_LONG_LOWER_51_BITS);
+  }
+
+  @VisibleForTesting
+  public static int decodePageNumber(long pagePlusOffsetAddress) {
+    return (int) ((pagePlusOffsetAddress & MASK_LONG_UPPER_13_BITS) >>> OFFSET_BITS);
+  }
+
+  private static long decodeOffset(long pagePlusOffsetAddress) {
+    return (pagePlusOffsetAddress & MASK_LONG_LOWER_51_BITS);
   }
 
   /**
@@ -189,7 +215,7 @@ public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) {
    */
   public Object getPage(long pagePlusOffsetAddress) {
     if (inHeap) {
-      final int pageNumber = (int) ((pagePlusOffsetAddress & MASK_LONG_UPPER_13_BITS) >>> 51);
+      final int pageNumber = decodePageNumber(pagePlusOffsetAddress);
       assert (pageNumber >= 0 && pageNumber < PAGE_TABLE_SIZE);
       final Object page = pageTable[pageNumber].getBaseObject();
       assert (page != null);
@@ -204,10 +230,15 @@ public Object getPage(long pagePlusOffsetAddress) {
    * {@link TaskMemoryManager#encodePageNumberAndOffset(MemoryBlock, long)}
    */
   public long getOffsetInPage(long pagePlusOffsetAddress) {
+    final long offsetInPage = decodeOffset(pagePlusOffsetAddress);
     if (inHeap) {
-      return (pagePlusOffsetAddress & MASK_LONG_LOWER_51_BITS);
+      return offsetInPage;
     } else {
-      return pagePlusOffsetAddress;
+      // In off-heap mode, an offset is an absolute address. In encodePageNumberAndOffset, we
+      // converted the absolute address into a relative address. Here, we invert that operation:
+      final int pageNumber = decodePageNumber(pagePlusOffsetAddress);
+      assert (pageNumber >= 0 && pageNumber < PAGE_TABLE_SIZE);
+      return pageTable[pageNumber].getBaseOffset() + offsetInPage;
     }
   }
 
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java
index 932882f1ca248..06fb081183659 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java
@@ -38,4 +38,27 @@ public void leakedPageMemoryIsDetected() {
     Assert.assertEquals(4096, manager.cleanUpAllAllocatedMemory());
   }
 
+  @Test
+  public void encodePageNumberAndOffsetOffHeap() {
+    final TaskMemoryManager manager =
+      new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.UNSAFE));
+    final MemoryBlock dataPage = manager.allocatePage(256);
+    // In off-heap mode, an offset is an absolute address that may require more than 51 bits to
+    // encode. This test exercises that corner-case:
+    final long offset = ((1L << TaskMemoryManager.OFFSET_BITS) + 10);
+    final long encodedAddress = manager.encodePageNumberAndOffset(dataPage, offset);
+    Assert.assertEquals(null, manager.getPage(encodedAddress));
+    Assert.assertEquals(offset, manager.getOffsetInPage(encodedAddress));
+  }
+
+  @Test
+  public void encodePageNumberAndOffsetOnHeap() {
+    final TaskMemoryManager manager =
+      new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP));
+    final MemoryBlock dataPage = manager.allocatePage(256);
+    final long encodedAddress = manager.encodePageNumberAndOffset(dataPage, 64);
+    Assert.assertEquals(dataPage.getBaseObject(), manager.getPage(encodedAddress));
+    Assert.assertEquals(64, manager.getOffsetInPage(encodedAddress));
+  }
+
 }

From 59aaa1dad6bee06e38ee5c03bdf82354242286ee Mon Sep 17 00:00:00 2001
From: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com>
Date: Wed, 13 May 2015 17:24:04 -0700
Subject: [PATCH 159/320] [SPARK-7601] [SQL] Support Insert into JDBC
 Datasource

Supported InsertableRelation for JDBC Datasource JDBCRelation.
Example usage:
sqlContext.sql(
      s"""
        |CREATE TEMPORARY TABLE testram1
        |USING org.apache.spark.sql.jdbc
        |OPTIONS (url '$url', dbtable 'testram1', user 'xx', password 'xx', driver 'com.h2.Driver')
      """.stripMargin.replaceAll("\n", " "))

sqlContext.sql("insert into table testram1 select * from testsrc")
sqlContext.sql("insert overwrite table testram1 select * from testsrc")

Author: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com>

Closes #6121 from gvramana/JDBCDatasource_insert and squashes the following commits:

f3fb5f1 [Venkata Ramana Gollamudi] Support for JDBC Datasource InsertableRelation
---
 .../apache/spark/sql/jdbc/JDBCRelation.scala  |  8 +++-
 .../spark/sql/jdbc/JDBCWriteSuite.scala       | 37 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
index d6b3fb3291a2e..93e82549f213b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Partition
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.sources._
@@ -129,7 +130,8 @@ private[sql] case class JDBCRelation(
     parts: Array[Partition],
     properties: Properties = new Properties())(@transient val sqlContext: SQLContext)
   extends BaseRelation
-  with PrunedFilteredScan {
+  with PrunedFilteredScan
+  with InsertableRelation {
 
   override val needConversion: Boolean = false
 
@@ -148,4 +150,8 @@ private[sql] case class JDBCRelation(
       filters,
       parts)
   }
+  
+  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+    data.insertIntoJDBC(url, table, overwrite, properties)
+  }  
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index f3ce8e66460e5..0800eded443de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -43,6 +43,29 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
    
     conn1 = DriverManager.getConnection(url1, properties)
     conn1.prepareStatement("create schema test").executeUpdate()
+    conn1.prepareStatement("drop table if exists test.people").executeUpdate()
+    conn1.prepareStatement(
+      "create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate()
+    conn1.prepareStatement("insert into test.people values ('fred', 1)").executeUpdate()
+    conn1.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate()
+    conn1.prepareStatement("drop table if exists test.people1").executeUpdate()
+    conn1.prepareStatement(
+      "create table test.people1 (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate()
+    conn1.commit()
+     
+    TestSQLContext.sql(
+      s"""
+        |CREATE TEMPORARY TABLE PEOPLE
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url1', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass')
+      """.stripMargin.replaceAll("\n", " "))
+    
+    TestSQLContext.sql(
+      s"""
+        |CREATE TEMPORARY TABLE PEOPLE1
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url1', dbtable 'TEST.PEOPLE1', user 'testUser', password 'testPass')
+      """.stripMargin.replaceAll("\n", " "))  
   }
 
   after {
@@ -114,5 +137,17 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
       df2.insertIntoJDBC(url, "TEST.INCOMPATIBLETEST", true)
     }
   }
-
+  
+  test("INSERT to JDBC Datasource") {
+    TestSQLContext.sql("INSERT INTO TABLE PEOPLE1 SELECT * FROM PEOPLE")
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).count)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
+  }
+  
+  test("INSERT to JDBC Datasource with overwrite") {
+    TestSQLContext.sql("INSERT INTO TABLE PEOPLE1 SELECT * FROM PEOPLE")
+    TestSQLContext.sql("INSERT OVERWRITE TABLE PEOPLE1 SELECT * FROM PEOPLE")
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).count)
+    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
+  } 
 }

From bce00dac403d3be2be59218b7b93a56c34c68f1a Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 13 May 2015 17:33:15 -0700
Subject: [PATCH 160/320] [SPARK-6752] [STREAMING] [REVISED] Allow
 StreamingContext to be recreated from checkpoint and existing SparkContext

This is a revision of the earlier version (see #5773) that passed the active SparkContext explicitly through a new set of Java and Scala API. The drawbacks are.

* Hard to implement in python.
* New API introduced. This is even more confusing since we are introducing getActiveOrCreate in SPARK-7553

Furthermore, there is now a direct way get an existing active SparkContext or create a new on - SparkContext.getOrCreate(conf). Its better to use this to get the SparkContext rather than have a new API to explicitly pass the context.

So in this PR I have
* Removed the new versions of StreamingContext.getOrCreate() which took SparkContext
* Added the ability to pick up existing SparkContext when the StreamingContext tries to create a SparkContext.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6096 from tdas/SPARK-6752 and squashes the following commits:

53f4b2d [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-6752
f024b77 [Tathagata Das] Removed extra API and used SparkContext.getOrCreate
---
 .../spark/streaming/StreamingContext.scala    | 49 +------------
 .../api/java/JavaStreamingContext.scala       | 45 ------------
 .../apache/spark/streaming/JavaAPISuite.java  | 25 +------
 .../streaming/StreamingContextSuite.scala     | 70 ++-----------------
 4 files changed, 9 insertions(+), 180 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 407cab45ed4c6..1d2ecdd341813 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -134,7 +134,7 @@ class StreamingContext private[streaming] (
     if (sc_ != null) {
       sc_
     } else if (isCheckpointPresent) {
-      new SparkContext(cp_.createSparkConf())
+      SparkContext.getOrCreate(cp_.createSparkConf())
     } else {
       throw new SparkException("Cannot create StreamingContext without a SparkContext")
     }
@@ -750,53 +750,6 @@ object StreamingContext extends Logging {
     checkpointOption.map(new StreamingContext(null, _, null)).getOrElse(creatingFunc())
   }
 
-  /**
-   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
-   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
-   * recreated from the checkpoint data. If the data does not exist, then the StreamingContext
-   * will be created by called the provided `creatingFunc` on the provided `sparkContext`. Note
-   * that the SparkConf configuration in the checkpoint data will not be restored as the
-   * SparkContext has already been created.
-   *
-   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
-   * @param creatingFunc   Function to create a new StreamingContext using the given SparkContext
-   * @param sparkContext   SparkContext using which the StreamingContext will be created
-   */
-  def getOrCreate(
-      checkpointPath: String,
-      creatingFunc: SparkContext => StreamingContext,
-      sparkContext: SparkContext
-    ): StreamingContext = {
-    getOrCreate(checkpointPath, creatingFunc, sparkContext, createOnError = false)
-  }
-
-  /**
-   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
-   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
-   * recreated from the checkpoint data. If the data does not exist, then the StreamingContext
-   * will be created by called the provided `creatingFunc` on the provided `sparkContext`. Note
-   * that the SparkConf configuration in the checkpoint data will not be restored as the
-   * SparkContext has already been created.
-   *
-   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
-   * @param creatingFunc   Function to create a new StreamingContext using the given SparkContext
-   * @param sparkContext   SparkContext using which the StreamingContext will be created
-   * @param createOnError  Whether to create a new StreamingContext if there is an
-   *                       error in reading checkpoint data. By default, an exception will be
-   *                       thrown on error.
-   */
-  def getOrCreate(
-      checkpointPath: String,
-      creatingFunc: SparkContext => StreamingContext,
-      sparkContext: SparkContext,
-      createOnError: Boolean
-    ): StreamingContext = {
-    val checkpointOption = CheckpointReader.read(
-      checkpointPath, sparkContext.conf, sparkContext.hadoopConfiguration, createOnError)
-    checkpointOption.map(new StreamingContext(sparkContext, _, null))
-                    .getOrElse(creatingFunc(sparkContext))
-  }
-
   /**
    * Find the JAR from which a given class was loaded, to make it easy for users to pass
    * their JARs to StreamingContext.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index d8fbed2c50644..b639b94d5ca47 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -804,51 +804,6 @@ object JavaStreamingContext {
     new JavaStreamingContext(ssc)
   }
 
-  /**
-   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
-   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
-   * recreated from the checkpoint data. If the data does not exist, then the provided factory
-   * will be used to create a JavaStreamingContext.
-   *
-   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
-   * @param creatingFunc   Function to create a new JavaStreamingContext
-   * @param sparkContext   SparkContext using which the StreamingContext will be created
-   */
-  def getOrCreate(
-      checkpointPath: String,
-      creatingFunc: JFunction[JavaSparkContext, JavaStreamingContext],
-      sparkContext: JavaSparkContext
-    ): JavaStreamingContext = {
-    val ssc = StreamingContext.getOrCreate(checkpointPath, (sparkContext: SparkContext) => {
-      creatingFunc.call(new JavaSparkContext(sparkContext)).ssc
-    }, sparkContext.sc)
-    new JavaStreamingContext(ssc)
-  }
-
-  /**
-   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
-   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
-   * recreated from the checkpoint data. If the data does not exist, then the provided factory
-   * will be used to create a JavaStreamingContext.
-   *
-   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
-   * @param creatingFunc   Function to create a new JavaStreamingContext
-   * @param sparkContext   SparkContext using which the StreamingContext will be created
-   * @param createOnError  Whether to create a new JavaStreamingContext if there is an
-   *                       error in reading checkpoint data.
-   */
-  def getOrCreate(
-      checkpointPath: String,
-      creatingFunc: JFunction[JavaSparkContext, JavaStreamingContext],
-      sparkContext: JavaSparkContext,
-      createOnError: Boolean
-    ): JavaStreamingContext = {
-    val ssc = StreamingContext.getOrCreate(checkpointPath, (sparkContext: SparkContext) => {
-      creatingFunc.call(new JavaSparkContext(sparkContext)).ssc
-    }, sparkContext.sc, createOnError)
-    new JavaStreamingContext(ssc)
-  }
-
   /**
    * Find the JAR from which a given class was loaded, to make it easy for users to pass
    * their JARs to StreamingContext.
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 2e00b980b9e44..1077b1b2cb7e3 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -1766,29 +1766,10 @@ public JavaStreamingContext call() {
     Assert.assertTrue("old context not recovered", !newContextCreated.get());
     ssc.stop();
 
-    // Function to create JavaStreamingContext using existing JavaSparkContext
-    // without any output operations (used to detect the new context)
-    Function<JavaSparkContext, JavaStreamingContext> creatingFunc2 =
-        new Function<JavaSparkContext, JavaStreamingContext>() {
-          public JavaStreamingContext call(JavaSparkContext context) {
-            newContextCreated.set(true);
-            return new JavaStreamingContext(context, Seconds.apply(1));
-          }
-        };
-
-    JavaSparkContext sc = new JavaSparkContext(conf);
-    newContextCreated.set(false);
-    ssc = JavaStreamingContext.getOrCreate(emptyDir.getAbsolutePath(), creatingFunc2, sc);
-    Assert.assertTrue("new context not created", newContextCreated.get());
-    ssc.stop(false);
-
     newContextCreated.set(false);
-    ssc = JavaStreamingContext.getOrCreate(corruptedCheckpointDir, creatingFunc2, sc, true);
-    Assert.assertTrue("new context not created", newContextCreated.get());
-    ssc.stop(false);
-
-    newContextCreated.set(false);
-    ssc = JavaStreamingContext.getOrCreate(checkpointDir, creatingFunc2, sc);
+    JavaSparkContext sc = new JavaSparkContext(conf);
+    ssc = JavaStreamingContext.getOrCreate(checkpointDir, creatingFunc,
+        new org.apache.hadoop.conf.Configuration());
     Assert.assertTrue("old context not recovered", !newContextCreated.get());
     ssc.stop();
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 5f93332896de1..4b12affbb0ddd 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -419,76 +419,16 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
       ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _)
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
-      assert(ssc.conf.get("someKey") === "someValue")
-    }
-  }
-
-  test("getOrCreate with existing SparkContext") {
-    val conf = new SparkConf().setMaster(master).setAppName(appName)
-    sc = new SparkContext(conf)
-
-    // Function to create StreamingContext that has a config to identify it to be new context
-    var newContextCreated = false
-    def creatingFunction(sparkContext: SparkContext): StreamingContext = {
-      newContextCreated = true
-      new StreamingContext(sparkContext, batchDuration)
-    }
-
-    // Call ssc.stop(stopSparkContext = false) after a body of cody
-    def testGetOrCreate(body: => Unit): Unit = {
-      newContextCreated = false
-      try {
-        body
-      } finally {
-        if (ssc != null) {
-          ssc.stop(stopSparkContext = false)
-        }
-        ssc = null
-      }
-    }
-
-    val emptyPath = Utils.createTempDir().getAbsolutePath()
-
-    // getOrCreate should create new context with empty path
-    testGetOrCreate {
-      ssc = StreamingContext.getOrCreate(emptyPath, creatingFunction _, sc, createOnError = true)
-      assert(ssc != null, "no context created")
-      assert(newContextCreated, "new context not created")
-      assert(ssc.sparkContext === sc, "new StreamingContext does not use existing SparkContext")
+      assert(ssc.conf.get("someKey") === "someValue", "checkpointed config not recovered")
     }
 
-    val corrutedCheckpointPath = createCorruptedCheckpoint()
-
-    // getOrCreate should throw exception with fake checkpoint file and createOnError = false
-    intercept[Exception] {
-      ssc = StreamingContext.getOrCreate(corrutedCheckpointPath, creatingFunction _, sc)
-    }
-
-    // getOrCreate should throw exception with fake checkpoint file
-    intercept[Exception] {
-      ssc = StreamingContext.getOrCreate(
-        corrutedCheckpointPath, creatingFunction _, sc, createOnError = false)
-    }
-
-    // getOrCreate should create new context with fake checkpoint file and createOnError = true
-    testGetOrCreate {
-      ssc = StreamingContext.getOrCreate(
-        corrutedCheckpointPath, creatingFunction _, sc, createOnError = true)
-      assert(ssc != null, "no context created")
-      assert(newContextCreated, "new context not created")
-      assert(ssc.sparkContext === sc, "new StreamingContext does not use existing SparkContext")
-    }
-
-    val checkpointPath = createValidCheckpoint()
-
-    // StreamingContext.getOrCreate should recover context with checkpoint path
+    // getOrCreate should recover StreamingContext with existing SparkContext
     testGetOrCreate {
-      ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _, sc)
+      sc = new SparkContext(conf)
+      ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _)
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
-      assert(ssc.sparkContext === sc, "new StreamingContext does not use existing SparkContext")
-      assert(!ssc.conf.contains("someKey"),
-        "recovered StreamingContext unexpectedly has old config")
+      assert(!ssc.conf.contains("someKey"), "checkpointed config unexpectedly recovered")
     }
   }
 

From 32e27df412706b30daf41f9d46c5572bb9a41bdb Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Wed, 13 May 2015 17:55:06 -0700
Subject: [PATCH 161/320] [HOTFIX] Bug in merge script

---
 dev/merge_spark_pr.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index f952c9d0b15e2..1c126f50bf095 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -266,10 +266,9 @@ def get_version_json(version_str):
 
     resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
     resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
-    custom_fields = {'resolution': {'id': resolution.raw['id']}}
     asf_jira.transition_issue(
         jira_id, resolve["id"], fixVersions = jira_fix_versions, 
-        comment = comment, fields = custom_fields)
+        comment = comment, resolution = {'id': resolution.raw['id']})
 
     print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
 

From 728af88cf6be4c25a732ab7e4fe66c1ed0041164 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Wed, 13 May 2015 17:58:29 -0700
Subject: [PATCH 162/320] [HOTFIX] Use 'new Job' in fsBasedParquet.scala

Same issue as #6095

cc liancheng

Author: zsxwing <zsxwing@gmail.com>

Closes #6136 from zsxwing/hotfix and squashes the following commits:

4beea54 [zsxwing] Use 'new Job' in fsBasedParquet.scala
---
 .../scala/org/apache/spark/sql/parquet/fsBasedParquet.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
index d810d6a028c58..c83a9c35dbddf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
@@ -231,7 +231,7 @@ private[sql] class FSBasedParquetRelation(
       filters: Array[Filter],
       inputPaths: Array[String]): RDD[Row] = {
 
-    val job = Job.getInstance(SparkHadoopUtil.get.conf)
+    val job = new Job(SparkHadoopUtil.get.conf)
     val conf = ContextUtil.getConfiguration(job)
 
     ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])

From 3113da9c7067bbf90639866ae9d946f02cc484ff Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 21:04:13 -0700
Subject: [PATCH 163/320] [HOT FIX #6125] Do not wait for all stages to start
 rendering

zsxwing

Author: Andrew Or <andrew@databricks.com>

Closes #6138 from andrewor14/dag-viz-clean-properly and squashes the following commits:

19d4e98 [Andrew Or] Add synchronize
02542d6 [Andrew Or] Rename overloaded variable
d11bee1 [Andrew Or] Don't wait until all stages have started before rendering
---
 .../ui/scope/RDDOperationGraphListener.scala  | 34 ++++++++++---------
 .../RDDOperationGraphListenerSuite.scala      |  1 -
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
index 3b77a1e12cc45..aa9c25cb5c8c6 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
@@ -41,11 +41,11 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
     conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES)
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */
-  def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = {
-    val stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty }
-    val graphs = stageIds.flatMap { sid => stageIdToGraph.get(sid) }
+  def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = synchronized {
+    val _stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty }
+    val graphs = _stageIds.flatMap { sid => stageIdToGraph.get(sid) }
     // If the metadata for some stages have been removed, do not bother rendering this job
-    if (stageIds.size != graphs.size) {
+    if (_stageIds.size != graphs.size) {
       Seq.empty
     } else {
       graphs
@@ -53,16 +53,29 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
   }
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */
-  def getOperationGraphForStage(stageId: Int): Option[RDDOperationGraph] = {
+  def getOperationGraphForStage(stageId: Int): Option[RDDOperationGraph] = synchronized {
     stageIdToGraph.get(stageId)
   }
 
   /** On job start, construct a RDDOperationGraph for each stage in the job for display later. */
   override def onJobStart(jobStart: SparkListenerJobStart): Unit = synchronized {
     val jobId = jobStart.jobId
+    val stageInfos = jobStart.stageInfos
+
     jobIds += jobId
     jobIdToStageIds(jobId) = jobStart.stageInfos.map(_.stageId).sorted
 
+    stageInfos.foreach { stageInfo =>
+      stageIds += stageInfo.stageId
+      stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo)
+      // Remove state for old stages
+      if (stageIds.size >= retainedStages) {
+        val toRemove = math.max(retainedStages / 10, 1)
+        stageIds.take(toRemove).foreach { id => stageIdToGraph.remove(id) }
+        stageIds.trimStart(toRemove)
+      }
+    }
+
     // Remove state for old jobs
     if (jobIds.size >= retainedJobs) {
       val toRemove = math.max(retainedJobs / 10, 1)
@@ -71,15 +84,4 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
     }
   }
 
-  /** Remove graph metadata for old stages */
-  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized {
-    val stageInfo = stageSubmitted.stageInfo
-    stageIds += stageInfo.stageId
-    stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo)
-    if (stageIds.size >= retainedStages) {
-      val toRemove = math.max(retainedStages / 10, 1)
-      stageIds.take(toRemove).foreach { id => stageIdToGraph.remove(id) }
-      stageIds.trimStart(toRemove)
-    }
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
index 619b38ac02676..c659fc1e8b9a9 100644
--- a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
@@ -31,7 +31,6 @@ class RDDOperationGraphListenerSuite extends FunSuite {
     assert(numStages > 0, "I will not run a job with 0 stages for you.")
     val stageInfos = (0 until numStages).map { _ =>
       val stageInfo = new StageInfo(stageIdCounter, 0, "s", 0, Seq.empty, Seq.empty, "d")
-      listener.onStageSubmitted(new SparkListenerStageSubmitted(stageInfo))
       stageIdCounter += 1
       stageInfo
     }

From d5f18de1657bfabf5493011e0b2c7ec29c02c64c Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 13 May 2015 21:27:17 -0700
Subject: [PATCH 164/320] [SPARK-7612] [MLLIB] update NB training to use
 mllib's BLAS

This is similar to the changes to k-means, which gives us better control on the performance. dbtsai

Author: Xiangrui Meng <meng@databricks.com>

Closes #6128 from mengxr/SPARK-7612 and squashes the following commits:

b5c24c5 [Xiangrui Meng] merge master
a90e3ec [Xiangrui Meng] update NB training to use mllib's BLAS
---
 .../mllib/classification/NaiveBayes.scala     | 43 +++++++++----------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index b381dc2cb0140..af24ab616663b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -21,15 +21,13 @@ import java.lang.{Iterable => JIterable}
 
 import scala.collection.JavaConverters._
 
-import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum, Axis}
+import breeze.linalg.{Axis, DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 import breeze.numerics.{exp => brzExp, log => brzLog}
-
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
-import org.json4s.{DefaultFormats, JValue}
 
 import org.apache.spark.{Logging, SparkContext, SparkException}
-import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector}
+import org.apache.spark.mllib.linalg.{BLAS, DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
@@ -90,13 +88,13 @@ class NaiveBayesModel private[mllib] (
     val brzData = testData.toBreeze
     modelType match {
       case "Multinomial" =>
-        labels (brzArgmax (brzPi + brzTheta * brzData) )
+        labels(brzArgmax(brzPi + brzTheta * brzData))
       case "Bernoulli" =>
         if (!brzData.forall(v => v == 0.0 || v == 1.0)) {
           throw new SparkException(
             s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $testData.")
         }
-        labels (brzArgmax (brzPi +
+        labels(brzArgmax(brzPi +
           (brzTheta - brzNegTheta.get) * brzData + brzNegThetaSum.get))
       case _ =>
         // This should never happen.
@@ -152,7 +150,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
       // Check schema explicitly since erasure makes it hard to use match-case for checking.
       checkSchema[Data](dataRDD.schema)
       val dataArray = dataRDD.select("labels", "pi", "theta", "modelType").take(1)
-      assert(dataArray.size == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}")
+      assert(dataArray.length == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}")
       val data = dataArray(0)
       val labels = data.getAs[Seq[Double]](0).toArray
       val pi = data.getAs[Seq[Double]](1).toArray
@@ -198,7 +196,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
       // Check schema explicitly since erasure makes it hard to use match-case for checking.
       checkSchema[Data](dataRDD.schema)
       val dataArray = dataRDD.select("labels", "pi", "theta").take(1)
-      assert(dataArray.size == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}")
+      assert(dataArray.length == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}")
       val data = dataArray(0)
       val labels = data.getAs[Seq[Double]](0).toArray
       val pi = data.getAs[Seq[Double]](1).toArray
@@ -288,10 +286,8 @@ class NaiveBayes private (
   def run(data: RDD[LabeledPoint]): NaiveBayesModel = {
     val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
       val values = v match {
-        case SparseVector(size, indices, values) =>
-          values
-        case DenseVector(values) =>
-          values
+        case sv: SparseVector => sv.values
+        case dv: DenseVector => dv.values
       }
       if (!values.forall(_ >= 0.0)) {
         throw new SparkException(s"Naive Bayes requires nonnegative feature values but found $v.")
@@ -300,10 +296,8 @@ class NaiveBayes private (
 
     val requireZeroOneBernoulliValues: Vector => Unit = (v: Vector) => {
       val values = v match {
-        case SparseVector(size, indices, values) =>
-          values
-        case DenseVector(values) =>
-          values
+        case sv: SparseVector => sv.values
+        case dv: DenseVector => dv.values
       }
       if (!values.forall(v => v == 0.0 || v == 1.0)) {
         throw new SparkException(
@@ -314,21 +308,24 @@ class NaiveBayes private (
     // Aggregates term frequencies per label.
     // TODO: Calling combineByKey and collect creates two stages, we can implement something
     // TODO: similar to reduceByKeyLocally to save one stage.
-    val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, BDV[Double])](
+    val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, DenseVector)](
       createCombiner = (v: Vector) => {
         if (modelType == "Bernoulli") {
           requireZeroOneBernoulliValues(v)
         } else {
           requireNonnegativeValues(v)
         }
-        (1L, v.toBreeze.toDenseVector)
+        (1L, v.copy.toDense)
       },
-      mergeValue = (c: (Long, BDV[Double]), v: Vector) => {
+      mergeValue = (c: (Long, DenseVector), v: Vector) => {
         requireNonnegativeValues(v)
-        (c._1 + 1L, c._2 += v.toBreeze)
+        BLAS.axpy(1.0, v, c._2)
+        (c._1 + 1L, c._2)
       },
-      mergeCombiners = (c1: (Long, BDV[Double]), c2: (Long, BDV[Double])) =>
-        (c1._1 + c2._1, c1._2 += c2._2)
+      mergeCombiners = (c1: (Long, DenseVector), c2: (Long, DenseVector)) => {
+        BLAS.axpy(1.0, c2._2, c1._2)
+        (c1._1 + c2._1, c1._2)
+      }
     ).collect()
 
     val numLabels = aggregated.length
@@ -348,7 +345,7 @@ class NaiveBayes private (
       labels(i) = label
       pi(i) = math.log(n + lambda) - piLogDenom
       val thetaLogDenom = modelType match {
-        case "Multinomial" => math.log(brzSum(sumTermFreqs) + numFeatures * lambda)
+        case "Multinomial" => math.log(sumTermFreqs.values.sum + numFeatures * lambda)
         case "Bernoulli" => math.log(n + 2.0 * lambda)
         case _ =>
           // This should never happen.

From d3db2fd66752e80865e9c7a75d8e8d945121697e Mon Sep 17 00:00:00 2001
From: DB Tsai <dbt@netflix.com>
Date: Wed, 13 May 2015 22:23:21 -0700
Subject: [PATCH 165/320] [SPARK-7620] [ML] [MLLIB] Removed calling size,
 length in while condition to avoid extra JVM call

Author: DB Tsai <dbt@netflix.com>

Closes #6137 from dbtsai/clean and squashes the following commits:

185816d [DB Tsai] fix compilication issue
f418d08 [DB Tsai] first commit
---
 .../classification/LogisticRegression.scala   |  9 ++--
 .../apache/spark/ml/feature/Bucketizer.scala  |  3 +-
 .../spark/ml/feature/VectorIndexer.scala      |  6 ++-
 .../ml/regression/LinearRegression.scala      |  6 ++-
 .../spark/mllib/feature/ChiSqSelector.scala   |  3 +-
 .../spark/mllib/optimization/Updater.scala    |  3 +-
 .../mllib/regression/IsotonicRegression.scala |  8 ++--
 .../stat/MultivariateOnlineSummarizer.scala   | 47 +++++++++++--------
 .../spark/mllib/stat/test/ChiSqTest.scala     |  6 ++-
 .../spark/mllib/tree/impurity/Impurity.scala  | 14 +++---
 .../mllib/util/LinearDataGenerator.scala      |  3 +-
 .../spark/ml/feature/BucketizerSuite.scala    |  6 ++-
 .../LogisticRegressionSuite.scala             |  3 +-
 13 files changed, 73 insertions(+), 44 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 93ba91167bfad..2b103626873a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -258,7 +258,8 @@ class LogisticRegressionModel private[ml] (
     rawPrediction match {
       case dv: DenseVector =>
         var i = 0
-        while (i < dv.size) {
+        val size = dv.size
+        while (i < size) {
           dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i)))
           i += 1
         }
@@ -357,7 +358,8 @@ private[classification] class MultiClassSummarizer extends Serializable {
   def histogram: Array[Long] = {
     val result = Array.ofDim[Long](numClasses)
     var i = 0
-    while (i < result.length) {
+    val len = result.length
+    while (i < len) {
       result(i) = distinctMap.getOrElse(i, 0L)
       i += 1
     }
@@ -480,7 +482,8 @@ private class LogisticAggregator(
       var i = 0
       val localThisGradientSumArray = this.gradientSumArray
       val localOtherGradientSumArray = other.gradientSumArray
-      while (i < localThisGradientSumArray.length) {
+      val len = localThisGradientSumArray.length
+      while (i < len) {
         localThisGradientSumArray(i) += localOtherGradientSumArray(i)
         i += 1
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index e52d797293cf3..d8f1961cb380a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -98,7 +98,8 @@ private[feature] object Bucketizer {
       false
     } else {
       var i = 0
-      while (i < splits.length - 1) {
+      val n = splits.length - 1
+      while (i < n) {
         if (splits(i) >= splits(i + 1)) return false
         i += 1
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 2e6313ac14485..0f83a29c86bf6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -189,7 +189,8 @@ private object VectorIndexer {
 
     private def addDenseVector(dv: DenseVector): Unit = {
       var i = 0
-      while (i < dv.size) {
+      val size = dv.size
+      while (i < size) {
         if (featureValueSets(i).size <= maxCategories) {
           featureValueSets(i).add(dv(i))
         }
@@ -201,7 +202,8 @@ private object VectorIndexer {
       // TODO: This might be able to handle 0's more efficiently.
       var vecIndex = 0 // index into vector
       var k = 0 // index into non-zero elements
-      while (vecIndex < sv.size) {
+      val size = sv.size
+      while (vecIndex < size) {
         val featureValue = if (k < sv.indices.length && vecIndex == sv.indices(k)) {
           k += 1
           sv.values(k - 1)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 6377923afc0c4..36c242bb5f2a7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -167,7 +167,8 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
     val weights = {
       val rawWeights = state.x.toArray.clone()
       var i = 0
-      while (i < rawWeights.length) {
+      val len = rawWeights.length
+      while (i < len) {
         rawWeights(i) *= { if (featuresStd(i) != 0.0) yStd / featuresStd(i) else 0.0 }
         i += 1
       }
@@ -307,7 +308,8 @@ private class LeastSquaresAggregator(
     val weightsArray = weights.toArray.clone()
     var sum = 0.0
     var i = 0
-    while (i < weightsArray.length) {
+    val len = weightsArray.length
+    while (i < len) {
       if (featuresStd(i) != 0.0) {
         weightsArray(i) /=  featuresStd(i)
         sum += weightsArray(i) * featuresMean(i)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index c6057c7f837b1..9cc2d0ffcab7d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -38,7 +38,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf
 
   protected def isSorted(array: Array[Int]): Boolean = {
     var i = 1
-    while (i < array.length) {
+    val len = array.length
+    while (i < len) {
       if (array(i) < array(i-1)) return false
       i += 1
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 3ed3a5b9b3843..9f463e0cafb6f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -116,7 +116,8 @@ class L1Updater extends Updater {
     // Apply proximal operator (soft thresholding)
     val shrinkageVal = regParam * thisIterStepSize
     var i = 0
-    while (i < brzWeights.length) {
+    val len = brzWeights.length
+    while (i < len) {
       val wi = brzWeights(i)
       brzWeights(i) = signum(wi) * max(0.0, abs(wi) - shrinkageVal)
       i += 1
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index be2a00c2dfea4..4ce541ae5bed9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -69,7 +69,8 @@ class IsotonicRegressionModel (
   /** Asserts the input array is monotone with the given ordering. */
   private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = {
     var i = 1
-    while (i < xs.length) {
+    val len = xs.length
+    while (i < len) {
       require(ord.compare(xs(i - 1), xs(i)) <= 0,
         s"Elements (${xs(i - 1)}, ${xs(i)}) are not ordered.")
       i += 1
@@ -329,11 +330,12 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
     }
 
     var i = 0
-    while (i < input.length) {
+    val len = input.length
+    while (i < len) {
       var j = i
 
       // Find monotonicity violating sequence, if any.
-      while (j < input.length - 1 && input(j)._1 > input(j + 1)._1) {
+      while (j < len - 1 && input(j)._1 > input(j + 1)._1) {
         j = j + 1
       }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index fcc2a148791bd..0b1755613aac4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -70,23 +70,30 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
     require(n == sample.size, s"Dimensions mismatch when adding new sample." +
       s" Expecting $n but got ${sample.size}.")
 
+    val localCurrMean= currMean
+    val localCurrM2n = currM2n
+    val localCurrM2 = currM2
+    val localCurrL1 = currL1
+    val localNnz = nnz
+    val localCurrMax = currMax
+    val localCurrMin = currMin
     sample.foreachActive { (index, value) =>
       if (value != 0.0) {
-        if (currMax(index) < value) {
-          currMax(index) = value
+        if (localCurrMax(index) < value) {
+          localCurrMax(index) = value
         }
-        if (currMin(index) > value) {
-          currMin(index) = value
+        if (localCurrMin(index) > value) {
+          localCurrMin(index) = value
         }
 
-        val prevMean = currMean(index)
+        val prevMean = localCurrMean(index)
         val diff = value - prevMean
-        currMean(index) = prevMean + diff / (nnz(index) + 1.0)
-        currM2n(index) += (value - currMean(index)) * diff
-        currM2(index) += value * value
-        currL1(index) += math.abs(value)
+        localCurrMean(index) = prevMean + diff / (localNnz(index) + 1.0)
+        localCurrM2n(index) += (value - localCurrMean(index)) * diff
+        localCurrM2(index) += value * value
+        localCurrL1(index) += math.abs(value)
 
-        nnz(index) += 1.0
+        localNnz(index) += 1.0
       }
     }
 
@@ -130,14 +137,14 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       }
     } else if (totalCnt == 0 && other.totalCnt != 0) {
       this.n = other.n
-      this.currMean = other.currMean.clone
-      this.currM2n = other.currM2n.clone
-      this.currM2 = other.currM2.clone
-      this.currL1 = other.currL1.clone
+      this.currMean = other.currMean.clone()
+      this.currM2n = other.currM2n.clone()
+      this.currM2 = other.currM2.clone()
+      this.currL1 = other.currL1.clone()
       this.totalCnt = other.totalCnt
-      this.nnz = other.nnz.clone
-      this.currMax = other.currMax.clone
-      this.currMin = other.currMin.clone
+      this.nnz = other.nnz.clone()
+      this.currMax = other.currMax.clone()
+      this.currMin = other.currMin.clone()
     }
     this
   }
@@ -165,7 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
     if (denominator > 0.0) {
       val deltaMean = currMean
       var i = 0
-      while (i < currM2n.size) {
+      val len = currM2n.length
+      while (i < len) {
         realVariance(i) =
           currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * (totalCnt - nnz(i)) / totalCnt
         realVariance(i) /= denominator
@@ -211,7 +219,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
     val realMagnitude = Array.ofDim[Double](n)
 
     var i = 0
-    while (i < currM2.size) {
+    val len = currM2.length
+    while (i < len) {
       realMagnitude(i) = math.sqrt(currM2(i))
       i += 1
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index ea82d39b72c03..e597fce2babd1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -205,8 +205,10 @@ private[stat] object ChiSqTest extends Logging {
     val colSums = new Array[Double](numCols)
     val rowSums = new Array[Double](numRows)
     val colMajorArr = counts.toArray
+    val colMajorArrLen = colMajorArr.length
+
     var i = 0
-    while (i < colMajorArr.size) {
+    while (i < colMajorArrLen) {
       val elem = colMajorArr(i)
       if (elem < 0.0) {
         throw new IllegalArgumentException("Contingency table cannot contain negative entries.")
@@ -220,7 +222,7 @@ private[stat] object ChiSqTest extends Logging {
     // second pass to collect statistic
     var statistic = 0.0
     var j = 0
-    while (j < colMajorArr.size) {
+    while (j < colMajorArrLen) {
       val col = j / numRows
       val colSum = colSums(col)
       if (colSum == 0.0) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 60e2ab2bb829e..72eb24c49264a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -111,11 +111,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
    * Add the stats from another calculator into this one, modifying and returning this calculator.
    */
   def add(other: ImpurityCalculator): ImpurityCalculator = {
-    require(stats.size == other.stats.size,
+    require(stats.length == other.stats.length,
       s"Two ImpurityCalculator instances cannot be added with different counts sizes." +
-        s"  Sizes are ${stats.size} and ${other.stats.size}.")
+        s"  Sizes are ${stats.length} and ${other.stats.length}.")
     var i = 0
-    while (i < other.stats.size) {
+    val len = other.stats.length
+    while (i < len) {
       stats(i) += other.stats(i)
       i += 1
     }
@@ -127,11 +128,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) {
    * calculator.
    */
   def subtract(other: ImpurityCalculator): ImpurityCalculator = {
-    require(stats.size == other.stats.size,
+    require(stats.length == other.stats.length,
       s"Two ImpurityCalculator instances cannot be subtracted with different counts sizes." +
-      s"  Sizes are ${stats.size} and ${other.stats.size}.")
+      s"  Sizes are ${stats.length} and ${other.stats.length}.")
     var i = 0
-    while (i < other.stats.size) {
+    val len = other.stats.length
+    while (i < len) {
       stats(i) -= other.stats(i)
       i += 1
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index b1a4517344970..b4e33c98ba7e5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -107,7 +107,8 @@ object LinearDataGenerator {
 
     x.foreach { v =>
       var i = 0
-      while (i < v.length) {
+      val len = v.length
+      while (i < len) {
         v(i) = (v(i) - 0.5) * math.sqrt(12.0 * xVariance(i)) + xMean(i)
         i += 1
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index 1900820400aee..20d2f3ac6696b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -122,7 +122,8 @@ private object BucketizerSuite extends FunSuite {
   def linearSearchForBuckets(splits: Array[Double], feature: Double): Double = {
     require(feature >= splits.head)
     var i = 0
-    while (i < splits.length - 1) {
+    val n = splits.length - 1
+    while (i < n) {
       if (feature < splits(i + 1)) return i
       i += 1
     }
@@ -138,7 +139,8 @@ private object BucketizerSuite extends FunSuite {
           s" ${splits.mkString(", ")}")
     }
     var i = 0
-    while (i < splits.length - 1) {
+    val n = splits.length - 1
+    while (i < n) {
       // Split i should fall in bucket i.
       testFeature(splits(i), i)
       // Value between splits i,i+1 should be in i, which is also true if the (i+1)-th split is inf.
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index fb0a194718802..966811a5a3263 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -101,7 +101,8 @@ object LogisticRegressionSuite {
       // This doesn't work if `vector` is a sparse vector.
       val vectorArray = vector.toArray
       var i = 0
-      while (i < vectorArray.length) {
+      val len = vectorArray.length
+      while (i < len) {
         vectorArray(i) = vectorArray(i) * math.sqrt(xVariance(i)) + xMean(i)
         i += 1
       }

From 13e652b61a81b2d2e94088006fbd5fd4ed383e3d Mon Sep 17 00:00:00 2001
From: linweizhong <linweizhong@huawei.com>
Date: Thu, 14 May 2015 00:23:27 -0700
Subject: [PATCH 166/320] [SPARK-7595] [SQL] Window will cause resolve failed
 with self join

for example:
table: src(key string, value string)
sql: with v1 as(select key, count(value) over (partition by key) cnt_val from src), v2 as(select v1.key, v1_lag.cnt_val from v1, v1 v1_lag where v1.key = v1_lag.key) select * from v2 limit 5;
then will analyze fail when resolving conflicting references in Join:
'Limit 5
 'Project [*]
  'Subquery v2
   'Project ['v1.key,'v1_lag.cnt_val]
    'Filter ('v1.key = 'v1_lag.key)
     'Join Inner, None
      Subquery v1
       Project [key#95,cnt_val#94L]
        Window [key#95,value#96], [HiveWindowFunction#org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount(value#96) WindowSpecDefinition [key#95], [], ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt_val#94L], WindowSpecDefinition [key#95], [], ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
         Project [key#95,value#96]
          MetastoreRelation default, src, None
      Subquery v1_lag
       Subquery v1
        Project [key#97,cnt_val#94L]
         Window [key#97,value#98], [HiveWindowFunction#org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount(value#98) WindowSpecDefinition [key#97], [], ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt_val#94L], WindowSpecDefinition [key#97], [], ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
          Project [key#97,value#98]
           MetastoreRelation default, src, None

Conflicting attributes: cnt_val#94L

Author: linweizhong <linweizhong@huawei.com>

Closes #6114 from Sephiroth-Lin/spark-7595 and squashes the following commits:

f8f2637 [linweizhong] Add unit test
dfe9169 [linweizhong] Handle windowExpression with self join
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  |  5 +++++
 .../spark/sql/hive/execution/SQLQuerySuite.scala       | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a4c61149dd975..4baeeb5b58c2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -322,6 +322,11 @@ class Analyzer(
           case oldVersion @ Aggregate(_, aggregateExpressions, _)
               if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty =>
             (oldVersion, oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions)))
+
+          case oldVersion @ Window(_, windowExpressions, _, child)
+              if AttributeSet(windowExpressions.map(_.toAttribute)).intersect(conflictingAttributes)
+                .nonEmpty =>
+            (oldVersion, oldVersion.copy(windowExpressions = newAliases(windowExpressions)))
         }.headOption.getOrElse { // Only handle first case, others will be fixed on the next pass.
           sys.error(
             s"""
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index eaa9d6aad1f31..5c7152e2140db 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -763,4 +763,14 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT CASE k WHEN 2 THEN 22 WHEN 4 THEN 44 ELSE 0 END, v FROM t"),
       Row(0, "1") :: Row(22, "2") :: Row(0, "3") :: Row(44, "4") :: Row(0, "5") :: Nil)
   }
+
+  test("SPARK-7595: Window will cause resolve failed with self join") {
+    checkAnswer(sql(
+      """
+        |with
+        | v1 as (select key, count(value) over (partition by key) cnt_val from src),
+        | v2 as (select v1.key, v1_lag.cnt_val from v1, v1 v1_lag where v1.key = v1_lag.key)
+        | select * from v2 order by key limit 1
+      """.stripMargin), Row(0, 3))
+  }
 }

From 1b8625f4258d6d1a049d0ba60e39e9757f5a568b Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 14 May 2015 01:22:15 -0700
Subject: [PATCH 167/320] [SPARK-7407] [MLLIB] use uid + name to identify
 parameters

A param instance is strongly attached to an parent in the current implementation. So if we make a copy of an estimator or a transformer in pipelines and other meta-algorithms, it becomes error-prone to copy the params to the copied instances. In this PR, a param is identified by its parent's UID and the param name. So it becomes loosely attached to its parent and all its derivatives. The UID is preserved during copying or fitting. All components now have a default constructor and a constructor that takes a UID as input. I keep the constructors for Param in this PR to reduce the amount of diff and moved `parent` as a mutable field.

This PR still needs some clean-ups, and there are several spark.ml PRs pending. I'll try to get them merged first and then update this PR.

jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6019 from mengxr/SPARK-7407 and squashes the following commits:

c4c8120 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7407
520f0a2 [Xiangrui Meng] address comments
2569168 [Xiangrui Meng] fix tests
873caca [Xiangrui Meng] fix tests in OneVsRest; fix a racing condition in shouldOwn
409ea08 [Xiangrui Meng] minor updates
83a163c [Xiangrui Meng] update JavaDeveloperApiExample
5db5325 [Xiangrui Meng] update OneVsRest
7bde7ae [Xiangrui Meng] merge master
697fdf9 [Xiangrui Meng] update Bucketizer
7b4f6c2 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7407
629d402 [Xiangrui Meng] fix LRSuite
154516f [Xiangrui Meng] merge master
aa4a611 [Xiangrui Meng] fix examples/compile
a4794dd [Xiangrui Meng] change Param to use  to reduce the size of diff
fdbc415 [Xiangrui Meng] all tests passed
c255f17 [Xiangrui Meng] fix tests in ParamsSuite
818e1db [Xiangrui Meng] merge master
e1160cf [Xiangrui Meng] fix tests
fbc39f0 [Xiangrui Meng] pass test:compile
108937e [Xiangrui Meng] pass compile
8726d39 [Xiangrui Meng] use parent uid in Param
eaeed35 [Xiangrui Meng] update Identifiable
---
 .../examples/ml/JavaDeveloperApiExample.java  |  43 +++++--
 .../examples/ml/DeveloperApiExample.scala     |  11 +-
 .../scala/org/apache/spark/ml/Model.scala     |  10 +-
 .../scala/org/apache/spark/ml/Pipeline.scala  |  11 +-
 .../scala/org/apache/spark/ml/Predictor.scala |   2 +-
 .../DecisionTreeClassifier.scala              |  13 +-
 .../ml/classification/GBTClassifier.scala     |  13 +-
 .../classification/LogisticRegression.scala   |  11 +-
 .../spark/ml/classification/OneVsRest.scala   |  27 ++--
 .../RandomForestClassifier.scala              |  13 +-
 .../BinaryClassificationEvaluator.scala       |   7 +-
 .../apache/spark/ml/feature/Binarizer.scala   |   7 +-
 .../apache/spark/ml/feature/Bucketizer.scala  |   8 +-
 .../spark/ml/feature/ElementwiseProduct.scala |   6 +-
 .../apache/spark/ml/feature/HashingTF.scala   |   5 +-
 .../org/apache/spark/ml/feature/IDF.scala     |  10 +-
 .../apache/spark/ml/feature/Normalizer.scala  |   5 +-
 .../spark/ml/feature/OneHotEncoder.scala      |   8 +-
 .../ml/feature/PolynomialExpansion.scala      |   6 +-
 .../spark/ml/feature/StandardScaler.scala     |  10 +-
 .../spark/ml/feature/StringIndexer.scala      |  10 +-
 .../apache/spark/ml/feature/Tokenizer.scala   |  10 +-
 .../spark/ml/feature/VectorAssembler.scala    |   6 +-
 .../spark/ml/feature/VectorIndexer.scala      |  13 +-
 .../apache/spark/ml/feature/Word2Vec.scala    |  10 +-
 .../org/apache/spark/ml/param/params.scala    | 115 ++++++++++++------
 .../apache/spark/ml/recommendation/ALS.scala  |  10 +-
 .../ml/regression/DecisionTreeRegressor.scala |  13 +-
 .../spark/ml/regression/GBTRegressor.scala    |  13 +-
 .../ml/regression/LinearRegression.scala      |  17 +--
 .../ml/regression/RandomForestRegressor.scala |  12 +-
 .../spark/ml/tuning/CrossValidator.scala      |  10 +-
 .../apache/spark/ml/util/Identifiable.scala   |  21 +++-
 .../JavaLogisticRegressionSuite.java          |   4 +-
 .../apache/spark/ml/param/JavaTestParams.java |  52 +++++---
 .../regression/JavaLinearRegressionSuite.java |   4 +-
 .../spark/ml/util/IdentifiableSuite.scala     |  40 ++++++
 .../DecisionTreeClassifierSuite.scala         |   2 +-
 .../classification/GBTClassifierSuite.scala   |   2 +-
 .../LogisticRegressionSuite.scala             |  39 +++---
 .../ml/classification/OneVsRestSuite.scala    |   6 +-
 .../RandomForestClassifierSuite.scala         |   2 +-
 .../apache/spark/ml/param/ParamsSuite.scala   |  16 ++-
 .../apache/spark/ml/param/TestParams.scala    |   5 +-
 .../DecisionTreeRegressorSuite.scala          |   2 +-
 .../ml/regression/GBTRegressorSuite.scala     |   3 +-
 .../RandomForestRegressorSuite.scala          |   2 +-
 47 files changed, 452 insertions(+), 213 deletions(-)
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
index eac4f898a475d..ec533d174ebdc 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
@@ -28,6 +28,7 @@
 import org.apache.spark.ml.classification.ClassificationModel;
 import org.apache.spark.ml.param.IntParam;
 import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.util.Identifiable$;
 import org.apache.spark.mllib.linalg.BLAS;
 import org.apache.spark.mllib.linalg.Vector;
 import org.apache.spark.mllib.linalg.Vectors;
@@ -103,7 +104,23 @@ public static void main(String[] args) throws Exception {
  *       However, this should still compile and run successfully.
  */
 class MyJavaLogisticRegression
-    extends Classifier<Vector, MyJavaLogisticRegression, MyJavaLogisticRegressionModel> {
+  extends Classifier<Vector, MyJavaLogisticRegression, MyJavaLogisticRegressionModel> {
+
+  public MyJavaLogisticRegression() {
+    init();
+  }
+
+  public MyJavaLogisticRegression(String uid) {
+    this.uid_ = uid;
+    init();
+  }
+
+  private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg");
+
+  @Override
+  public String uid() {
+    return uid_;
+  }
 
   /**
    * Param for max number of iterations
@@ -117,7 +134,7 @@ class MyJavaLogisticRegression
 
   int getMaxIter() { return (Integer) getOrDefault(maxIter); }
 
-  public MyJavaLogisticRegression() {
+  private void init() {
     setMaxIter(100);
   }
 
@@ -137,7 +154,7 @@ public MyJavaLogisticRegressionModel train(DataFrame dataset) {
     Vector weights = Vectors.zeros(numFeatures); // Learning would happen here.
 
     // Create a model, and return it.
-    return new MyJavaLogisticRegressionModel(this, weights);
+    return new MyJavaLogisticRegressionModel(uid(), weights).setParent(this);
   }
 }
 
@@ -149,17 +166,21 @@ public MyJavaLogisticRegressionModel train(DataFrame dataset) {
  *       However, this should still compile and run successfully.
  */
 class MyJavaLogisticRegressionModel
-    extends ClassificationModel<Vector, MyJavaLogisticRegressionModel> {
-
-  private MyJavaLogisticRegression parent_;
-  public MyJavaLogisticRegression parent() { return parent_; }
+  extends ClassificationModel<Vector, MyJavaLogisticRegressionModel> {
 
   private Vector weights_;
   public Vector weights() { return weights_; }
 
-  public MyJavaLogisticRegressionModel(MyJavaLogisticRegression parent_, Vector weights_) {
-    this.parent_ = parent_;
-    this.weights_ = weights_;
+  public MyJavaLogisticRegressionModel(String uid, Vector weights) {
+    this.uid_ = uid;
+    this.weights_ = weights;
+  }
+
+  private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg");
+
+  @Override
+  public String uid() {
+    return uid_;
   }
 
   // This uses the default implementation of transform(), which reads column "features" and outputs
@@ -204,6 +225,6 @@ public Vector predictRaw(Vector features) {
    */
   @Override
   public MyJavaLogisticRegressionModel copy(ParamMap extra) {
-    return copyValues(new MyJavaLogisticRegressionModel(parent_, weights_), extra);
+    return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), extra);
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 2a2d0677272a0..3ee456edbe01e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -20,6 +20,7 @@ package org.apache.spark.examples.ml
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.ml.classification.{ClassificationModel, Classifier, ClassifierParams}
 import org.apache.spark.ml.param.{IntParam, ParamMap}
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.sql.{DataFrame, Row, SQLContext}
@@ -106,10 +107,12 @@ private trait MyLogisticRegressionParams extends ClassifierParams {
  *
  * NOTE: This is private since it is an example.  In practice, you may not want it to be private.
  */
-private class MyLogisticRegression
+private class MyLogisticRegression(override val uid: String)
   extends Classifier[Vector, MyLogisticRegression, MyLogisticRegressionModel]
   with MyLogisticRegressionParams {
 
+  def this() = this(Identifiable.randomUID("myLogReg"))
+
   setMaxIter(100) // Initialize
 
   // The parameter setter is in this class since it should return type MyLogisticRegression.
@@ -125,7 +128,7 @@ private class MyLogisticRegression
     val weights = Vectors.zeros(numFeatures) // Learning would happen here.
 
     // Create a model, and return it.
-    new MyLogisticRegressionModel(this, weights)
+    new MyLogisticRegressionModel(uid, weights).setParent(this)
   }
 }
 
@@ -135,7 +138,7 @@ private class MyLogisticRegression
  * NOTE: This is private since it is an example.  In practice, you may not want it to be private.
  */
 private class MyLogisticRegressionModel(
-    override val parent: MyLogisticRegression,
+    override val uid: String,
     val weights: Vector)
   extends ClassificationModel[Vector, MyLogisticRegressionModel]
   with MyLogisticRegressionParams {
@@ -173,6 +176,6 @@ private class MyLogisticRegressionModel(
    * This is used for the default implementation of [[transform()]].
    */
   override def copy(extra: ParamMap): MyLogisticRegressionModel = {
-    copyValues(new MyLogisticRegressionModel(parent, weights), extra)
+    copyValues(new MyLogisticRegressionModel(uid, weights), extra)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
index 9974efe7b1d25..7fd515369b19b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Model.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
@@ -32,7 +32,15 @@ abstract class Model[M <: Model[M]] extends Transformer {
    * The parent estimator that produced this model.
    * Note: For ensembles' component Models, this value can be null.
    */
-  val parent: Estimator[M]
+  var parent: Estimator[M] = _
+
+  /**
+   * Sets the parent of this model (Java API).
+   */
+  def setParent(parent: Estimator[M]): M = {
+    this.parent = parent
+    this.asInstanceOf[M]
+  }
 
   override def copy(extra: ParamMap): M = {
     // The default implementation of Params.copy doesn't work for models.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 33d430f5671ee..fac54188f9f4e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable.ListBuffer
 import org.apache.spark.Logging
 import org.apache.spark.annotation.{AlphaComponent, DeveloperApi}
 import org.apache.spark.ml.param.{Param, ParamMap, Params}
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.types.StructType
 
@@ -80,7 +81,9 @@ abstract class PipelineStage extends Params with Logging {
  * an identity transformer.
  */
 @AlphaComponent
-class Pipeline extends Estimator[PipelineModel] {
+class Pipeline(override val uid: String) extends Estimator[PipelineModel] {
+
+  def this() = this(Identifiable.randomUID("pipeline"))
 
   /**
    * param for pipeline stages
@@ -148,7 +151,7 @@ class Pipeline extends Estimator[PipelineModel] {
       }
     }
 
-    new PipelineModel(this, transformers.toArray)
+    new PipelineModel(uid, transformers.toArray).setParent(this)
   }
 
   override def copy(extra: ParamMap): Pipeline = {
@@ -171,7 +174,7 @@ class Pipeline extends Estimator[PipelineModel] {
  */
 @AlphaComponent
 class PipelineModel private[ml] (
-    override val parent: Pipeline,
+    override val uid: String,
     val stages: Array[Transformer])
   extends Model[PipelineModel] with Logging {
 
@@ -190,6 +193,6 @@ class PipelineModel private[ml] (
   }
 
   override def copy(extra: ParamMap): PipelineModel = {
-    new PipelineModel(parent, stages)
+    new PipelineModel(uid, stages)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index f6a5f27425d1f..ec0f76aa668bd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -88,7 +88,7 @@ abstract class Predictor[
     // This handles a few items such as schema validation.
     // Developers only need to implement train().
     transformSchema(dataset.schema, logging = true)
-    copyValues(train(dataset))
+    copyValues(train(dataset).setParent(this))
   }
 
   override def copy(extra: ParamMap): Learner = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index dcebea1d4b015..7c961332bf5b6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree.{TreeClassifierParams, DecisionTreeParams, DecisionTreeModel, Node}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree}
@@ -39,10 +39,12 @@ import org.apache.spark.sql.DataFrame
  * features.
  */
 @AlphaComponent
-final class DecisionTreeClassifier
+final class DecisionTreeClassifier(override val uid: String)
   extends Predictor[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel]
   with DecisionTreeParams with TreeClassifierParams {
 
+  def this() = this(Identifiable.randomUID("dtc"))
+
   // Override parameter setters from parent trait for Java API compatibility.
 
   override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
@@ -101,7 +103,7 @@ object DecisionTreeClassifier {
  */
 @AlphaComponent
 final class DecisionTreeClassificationModel private[ml] (
-    override val parent: DecisionTreeClassifier,
+    override val uid: String,
     override val rootNode: Node)
   extends PredictionModel[Vector, DecisionTreeClassificationModel]
   with DecisionTreeModel with Serializable {
@@ -114,7 +116,7 @@ final class DecisionTreeClassificationModel private[ml] (
   }
 
   override def copy(extra: ParamMap): DecisionTreeClassificationModel = {
-    copyValues(new DecisionTreeClassificationModel(parent, rootNode), extra)
+    copyValues(new DecisionTreeClassificationModel(uid, rootNode), extra)
   }
 
   override def toString: String = {
@@ -138,6 +140,7 @@ private[ml] object DecisionTreeClassificationModel {
       s"Cannot convert non-classification DecisionTreeModel (old API) to" +
         s" DecisionTreeClassificationModel (new API).  Algo is: ${oldModel.algo}")
     val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures)
-    new DecisionTreeClassificationModel(parent, rootNode)
+    val uid = if (parent != null) parent.uid else Identifiable.randomUID("dtc")
+    new DecisionTreeClassificationModel(uid, rootNode)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index ae51b05a0c42d..d504d84beb91e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.{Param, ParamMap}
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
 import org.apache.spark.ml.tree.{GBTParams, TreeClassifierParams, DecisionTreeModel, TreeEnsembleModel}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{GradientBoostedTrees => OldGBT}
@@ -44,10 +44,12 @@ import org.apache.spark.sql.DataFrame
  * Note: Multiclass labels are not currently supported.
  */
 @AlphaComponent
-final class GBTClassifier
+final class GBTClassifier(override val uid: String)
   extends Predictor[Vector, GBTClassifier, GBTClassificationModel]
   with GBTParams with TreeClassifierParams with Logging {
 
+  def this() = this(Identifiable.randomUID("gbtc"))
+
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeClassifierParams:
@@ -160,7 +162,7 @@ object GBTClassifier {
  */
 @AlphaComponent
 final class GBTClassificationModel(
-    override val parent: GBTClassifier,
+    override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel],
     private val _treeWeights: Array[Double])
   extends PredictionModel[Vector, GBTClassificationModel]
@@ -184,7 +186,7 @@ final class GBTClassificationModel(
   }
 
   override def copy(extra: ParamMap): GBTClassificationModel = {
-    copyValues(new GBTClassificationModel(parent, _trees, _treeWeights), extra)
+    copyValues(new GBTClassificationModel(uid, _trees, _treeWeights), extra)
   }
 
   override def toString: String = {
@@ -210,6 +212,7 @@ private[ml] object GBTClassificationModel {
       // parent, fittingParamMap for each tree is null since there are no good ways to set these.
       DecisionTreeRegressionModel.fromOld(tree, null, categoricalFeatures)
     }
-    new GBTClassificationModel(parent, newTrees, oldModel.treeWeights)
+    val uid = if (parent != null) parent.uid else Identifiable.randomUID("gbtc")
+    new GBTClassificationModel(parent.uid, newTrees, oldModel.treeWeights)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 2b103626873a9..8694c96e4c5b6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -26,6 +26,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction}
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.linalg.BLAS._
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -50,10 +51,12 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
  * Currently, this class only supports binary classification.
  */
 @AlphaComponent
-class LogisticRegression
+class LogisticRegression(override val uid: String)
   extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel]
   with LogisticRegressionParams with Logging {
 
+  def this() = this(Identifiable.randomUID("logreg"))
+
   /**
    * Set the regularization parameter.
    * Default is 0.0.
@@ -213,7 +216,7 @@ class LogisticRegression
       (weightsWithIntercept, 0.0)
     }
 
-    new LogisticRegressionModel(this, weights.compressed, intercept)
+    new LogisticRegressionModel(uid, weights.compressed, intercept)
   }
 }
 
@@ -224,7 +227,7 @@ class LogisticRegression
  */
 @AlphaComponent
 class LogisticRegressionModel private[ml] (
-    override val parent: LogisticRegression,
+    override val uid: String,
     val weights: Vector,
     val intercept: Double)
   extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel]
@@ -276,7 +279,7 @@ class LogisticRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): LogisticRegressionModel = {
-    copyValues(new LogisticRegressionModel(parent, weights, intercept), extra)
+    copyValues(new LogisticRegressionModel(uid, weights, intercept), extra)
   }
 
   override protected def raw2prediction(rawPrediction: Vector): Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index afb8d75d57384..1543f051ccd17 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -25,7 +25,7 @@ import org.apache.spark.annotation.{AlphaComponent, Experimental}
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.param.Param
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -40,19 +40,17 @@ private[ml] trait OneVsRestParams extends PredictorParams {
   type ClassifierType = Classifier[F, E, M] forSome {
     type F
     type M <: ClassificationModel[F, M]
-    type E <:  Classifier[F, E, M]
+    type E <: Classifier[F, E, M]
   }
 
   /**
    * param for the base binary classifier that we reduce multiclass classification into.
    * @group param
    */
-  val classifier: Param[ClassifierType]  =
-    new Param(this, "classifier", "base binary classifier ")
+  val classifier: Param[ClassifierType] = new Param(this, "classifier", "base binary classifier")
 
   /** @group getParam */
   def getClassifier: ClassifierType = $(classifier)
-
 }
 
 /**
@@ -70,10 +68,10 @@ private[ml] trait OneVsRestParams extends PredictorParams {
  *               (taking label 0).
  */
 @AlphaComponent
-class OneVsRestModel private[ml] (
-      override val parent: OneVsRest,
-      labelMetadata: Metadata,
-      val models: Array[_ <: ClassificationModel[_,_]])
+final class OneVsRestModel private[ml] (
+    override val uid: String,
+    labelMetadata: Metadata,
+    val models: Array[_ <: ClassificationModel[_,_]])
   extends Model[OneVsRestModel] with OneVsRestParams {
 
   override def transformSchema(schema: StructType): StructType = {
@@ -145,11 +143,13 @@ class OneVsRestModel private[ml] (
  * is picked to label the example.
  */
 @Experimental
-final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
+final class OneVsRest(override val uid: String)
+  extends Estimator[OneVsRestModel] with OneVsRestParams {
+
+  def this() = this(Identifiable.randomUID("oneVsRest"))
 
   /** @group setParam */
-  def setClassifier(value: Classifier[_,_,_]): this.type = {
-    // TODO: Find a better way to do this. Existential Types don't work with Java API so cast needed
+  def setClassifier(value: Classifier[_, _, _]): this.type = {
     set(classifier, value.asInstanceOf[ClassifierType])
   }
 
@@ -204,6 +204,7 @@ final class OneVsRest extends Estimator[OneVsRestModel] with OneVsRestParams {
         NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses)
       case attr: Attribute => attr
     }
-    copyValues(new OneVsRestModel(this, labelAttribute.toMetadata(), models))
+    val model = new OneVsRestModel(uid, labelAttribute.toMetadata(), models).setParent(this)
+    copyValues(model)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 9954893f14359..a1de7919859eb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -23,7 +23,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree.{RandomForestParams, TreeClassifierParams, DecisionTreeModel, TreeEnsembleModel}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{RandomForest => OldRandomForest}
@@ -41,10 +41,12 @@ import org.apache.spark.sql.DataFrame
  * features.
  */
 @AlphaComponent
-final class RandomForestClassifier
+final class RandomForestClassifier(override val uid: String)
   extends Predictor[Vector, RandomForestClassifier, RandomForestClassificationModel]
   with RandomForestParams with TreeClassifierParams {
 
+  def this() = this(Identifiable.randomUID("rfc"))
+
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeClassifierParams:
@@ -118,7 +120,7 @@ object RandomForestClassifier {
  */
 @AlphaComponent
 final class RandomForestClassificationModel private[ml] (
-    override val parent: RandomForestClassifier,
+    override val uid: String,
     private val _trees: Array[DecisionTreeClassificationModel])
   extends PredictionModel[Vector, RandomForestClassificationModel]
   with TreeEnsembleModel with Serializable {
@@ -146,7 +148,7 @@ final class RandomForestClassificationModel private[ml] (
   }
 
   override def copy(extra: ParamMap): RandomForestClassificationModel = {
-    copyValues(new RandomForestClassificationModel(parent, _trees), extra)
+    copyValues(new RandomForestClassificationModel(uid, _trees), extra)
   }
 
   override def toString: String = {
@@ -172,6 +174,7 @@ private[ml] object RandomForestClassificationModel {
       // parent, fittingParamMap for each tree is null since there are no good ways to set these.
       DecisionTreeClassificationModel.fromOld(tree, null, categoricalFeatures)
     }
-    new RandomForestClassificationModel(parent, newTrees)
+    val uid = if (parent != null) parent.uid else Identifiable.randomUID("rfc")
+    new RandomForestClassificationModel(uid, newTrees)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index e5a73c6087a11..c1af09c9694ba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.Evaluator
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
-import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql.{DataFrame, Row}
@@ -33,7 +33,10 @@ import org.apache.spark.sql.types.DoubleType
  * Evaluator for binary classification, which expects two input columns: score and label.
  */
 @AlphaComponent
-class BinaryClassificationEvaluator extends Evaluator with HasRawPredictionCol with HasLabelCol {
+class BinaryClassificationEvaluator(override val uid: String)
+  extends Evaluator with HasRawPredictionCol with HasLabelCol {
+
+  def this() = this(Identifiable.randomUID("binEval"))
 
   /**
    * param for metric name in evaluation
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 6eb1db6971111..62f4a6343423e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.BinaryAttribute
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
-import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DoubleType, StructType}
@@ -32,7 +32,10 @@ import org.apache.spark.sql.types.{DoubleType, StructType}
  * Binarize a column of continuous features given a threshold.
  */
 @AlphaComponent
-final class Binarizer extends Transformer with HasInputCol with HasOutputCol {
+final class Binarizer(override val uid: String)
+  extends Transformer with HasInputCol with HasOutputCol {
+
+  def this() = this(Identifiable.randomUID("binarizer"))
 
   /**
    * Param for threshold used to binarize continuous features.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index d8f1961cb380a..ac8dfb5632a7b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -21,11 +21,11 @@ import java.{util => ju}
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.Model
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
-import org.apache.spark.ml.util.SchemaUtils
-import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
@@ -35,10 +35,10 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
  * `Bucketizer` maps a column of continuous features to a column of feature buckets.
  */
 @AlphaComponent
-final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
+final class Bucketizer(override val uid: String)
   extends Model[Bucketizer] with HasInputCol with HasOutputCol {
 
-  def this() = this(null)
+  def this() = this(Identifiable.randomUID("bucketizer"))
 
   /**
    * Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
index f8b56293e3ccc..8b32eee0e490a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param.Param
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql.types.DataType
@@ -31,7 +32,10 @@ import org.apache.spark.sql.types.DataType
  * multiplier.
  */
 @AlphaComponent
-class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
+class ElementwiseProduct(override val uid: String)
+  extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
+
+  def this() = this(Identifiable.randomUID("elemProd"))
 
   /**
     * the vector to multiply with input vectors
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index c305a819a8966..30033ced68a04 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param.{IntParam, ParamValidators}
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql.types.DataType
@@ -29,7 +30,9 @@ import org.apache.spark.sql.types.DataType
  * Maps a sequence of terms to their term frequencies using the hashing trick.
  */
 @AlphaComponent
-class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
+class HashingTF(override val uid: String) extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
+
+  def this() = this(Identifiable.randomUID("hashingTF"))
 
   /**
    * Number of features.  Should be > 0.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index d901a20aed002..788c392050c2d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
-import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql._
@@ -62,7 +62,9 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
  * Compute the Inverse Document Frequency (IDF) given a collection of documents.
  */
 @AlphaComponent
-final class IDF extends Estimator[IDFModel] with IDFBase {
+final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase {
+
+  def this() = this(Identifiable.randomUID("idf"))
 
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
@@ -74,7 +76,7 @@ final class IDF extends Estimator[IDFModel] with IDFBase {
     transformSchema(dataset.schema, logging = true)
     val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v }
     val idf = new feature.IDF($(minDocFreq)).fit(input)
-    copyValues(new IDFModel(this, idf))
+    copyValues(new IDFModel(uid, idf).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -88,7 +90,7 @@ final class IDF extends Estimator[IDFModel] with IDFBase {
  */
 @AlphaComponent
 class IDFModel private[ml] (
-    override val parent: IDF,
+    override val uid: String,
     idfModel: feature.IDFModel)
   extends Model[IDFModel] with IDFBase {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index 755b46a64c7f1..3f689d1585cd6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param.{DoubleParam, ParamValidators}
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql.types.DataType
@@ -29,7 +30,9 @@ import org.apache.spark.sql.types.DataType
  * Normalize a vector to have unit norm using the given p-norm.
  */
 @AlphaComponent
-class Normalizer extends UnaryTransformer[Vector, Vector, Normalizer] {
+class Normalizer(override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] {
+
+  def this() = this(Identifiable.randomUID("normalizer"))
 
   /**
    * Normalization in L^p^ space.  Must be >= 1.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 46514ae5f0e84..1fb9b9ae75091 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.attribute.{Attribute, BinaryAttribute, NominalAttribu
 import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
-import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 
 /**
@@ -37,8 +37,10 @@ import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
  * linearly dependent because they sum up to one.
  */
 @AlphaComponent
-class OneHotEncoder extends UnaryTransformer[Double, Vector, OneHotEncoder]
-  with HasInputCol with HasOutputCol {
+class OneHotEncoder(override val uid: String)
+  extends UnaryTransformer[Double, Vector, OneHotEncoder] with HasInputCol with HasOutputCol {
+
+  def this() = this(Identifiable.randomUID("oneHot"))
 
   /**
    * Whether to include a component in the encoded vectors for the first category, defaults to true.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 9e6177ca27e4a..41564410e4965 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param.{IntParam, ParamValidators}
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.sql.types.DataType
 
@@ -34,7 +35,10 @@ import org.apache.spark.sql.types.DataType
  * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
  */
 @AlphaComponent
-class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExpansion] {
+class PolynomialExpansion(override val uid: String)
+  extends UnaryTransformer[Vector, Vector, PolynomialExpansion] {
+
+  def this() = this(Identifiable.randomUID("poly"))
 
   /**
    * The polynomial degree to expand, which should be >= 1.  A value of 1 means no expansion.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 7cad59ff3fa37..5ccda15d872ed 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -21,6 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
 import org.apache.spark.sql._
@@ -55,7 +56,10 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
  * statistics on the samples in the training set.
  */
 @AlphaComponent
-class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams {
+class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel]
+  with StandardScalerParams {
+
+  def this() = this(Identifiable.randomUID("stdScal"))
 
   setDefault(withMean -> false, withStd -> true)
 
@@ -76,7 +80,7 @@ class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerP
     val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v }
     val scaler = new feature.StandardScaler(withMean = $(withMean), withStd = $(withStd))
     val scalerModel = scaler.fit(input)
-    copyValues(new StandardScalerModel(this, scalerModel))
+    copyValues(new StandardScalerModel(uid, scalerModel).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -96,7 +100,7 @@ class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerP
  */
 @AlphaComponent
 class StandardScalerModel private[ml] (
-    override val parent: StandardScaler,
+    override val uid: String,
     scaler: feature.StandardScalerModel)
   extends Model[StandardScalerModel] with StandardScalerParams {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 3d78537ad84cb..3f79b67309f07 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -23,6 +23,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{NumericType, StringType, StructType}
@@ -58,7 +59,10 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
  * So the most frequent label gets index 0.
  */
 @AlphaComponent
-class StringIndexer extends Estimator[StringIndexerModel] with StringIndexerBase {
+class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel]
+  with StringIndexerBase {
+
+  def this() = this(Identifiable.randomUID("strIdx"))
 
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
@@ -73,7 +77,7 @@ class StringIndexer extends Estimator[StringIndexerModel] with StringIndexerBase
       .map(_.getString(0))
       .countByValue()
     val labels = counts.toSeq.sortBy(-_._2).map(_._1).toArray
-    copyValues(new StringIndexerModel(this, labels))
+    copyValues(new StringIndexerModel(uid, labels).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -87,7 +91,7 @@ class StringIndexer extends Estimator[StringIndexerModel] with StringIndexerBase
  */
 @AlphaComponent
 class StringIndexerModel private[ml] (
-    override val parent: StringIndexer,
+    override val uid: String,
     labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase {
 
   private val labelToIndex: OpenHashMap[String, Double] = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 649c217b16590..36d9e17eca41b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.param._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
 
 /**
@@ -27,7 +28,9 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
  * A tokenizer that converts the input string to lowercase and then splits it by white spaces.
  */
 @AlphaComponent
-class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] {
+class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] {
+
+  def this() = this(Identifiable.randomUID("tok"))
 
   override protected def createTransformFunc: String => Seq[String] = {
     _.toLowerCase.split("\\s")
@@ -48,7 +51,10 @@ class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] {
  * It returns an array of strings that can be empty.
  */
 @AlphaComponent
-class RegexTokenizer extends UnaryTransformer[String, Seq[String], RegexTokenizer] {
+class RegexTokenizer(override val uid: String)
+  extends UnaryTransformer[String, Seq[String], RegexTokenizer] {
+
+  def this() = this(Identifiable.randomUID("regexTok"))
 
   /**
    * Minimum token length, >= 0.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index 796758a70ef18..1c0009476908c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -33,7 +34,10 @@ import org.apache.spark.sql.types._
  * A feature transformer that merges multiple columns into a vector column.
  */
 @AlphaComponent
-class VectorAssembler extends Transformer with HasInputCols with HasOutputCol {
+class VectorAssembler(override val uid: String)
+  extends Transformer with HasInputCols with HasOutputCol {
+
+  def this() = this(Identifiable.randomUID("va"))
 
   /** @group setParam */
   def setInputCols(value: Array[String]): this.type = set(inputCols, value)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 0f83a29c86bf6..6d1d0524e59ee 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.param.{IntParam, ParamValidators, Params}
 import org.apache.spark.ml.param.shared._
-import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, VectorUDT}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions.callUDF
@@ -87,7 +87,10 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
  *  - Add option for allowing unknown categories.
  */
 @AlphaComponent
-class VectorIndexer extends Estimator[VectorIndexerModel] with VectorIndexerParams {
+class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel]
+  with VectorIndexerParams {
+
+  def this() = this(Identifiable.randomUID("vecIdx"))
 
   /** @group setParam */
   def setMaxCategories(value: Int): this.type = set(maxCategories, value)
@@ -110,7 +113,9 @@ class VectorIndexer extends Estimator[VectorIndexerModel] with VectorIndexerPara
       iter.foreach(localCatStats.addVector)
       Iterator(localCatStats)
     }.reduce((stats1, stats2) => stats1.merge(stats2))
-    copyValues(new VectorIndexerModel(this, numFeatures, categoryStats.getCategoryMaps))
+    val model = new VectorIndexerModel(uid, numFeatures, categoryStats.getCategoryMaps)
+      .setParent(this)
+    copyValues(model)
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -238,7 +243,7 @@ private object VectorIndexer {
  */
 @AlphaComponent
 class VectorIndexerModel private[ml] (
-    override val parent: VectorIndexer,
+    override val uid: String,
     val numFeatures: Int,
     val categoryMaps: Map[Int, Map[Double, Int]])
   extends Model[VectorIndexerModel] with VectorIndexerParams {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 34ff92970129f..8ace8c53bb663 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
-import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{VectorUDT, Vectors}
 import org.apache.spark.mllib.linalg.BLAS._
@@ -85,7 +85,9 @@ private[feature] trait Word2VecBase extends Params
  * natural language processing or machine learning process.
  */
 @AlphaComponent
-final class Word2Vec extends Estimator[Word2VecModel] with Word2VecBase {
+final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase {
+
+  def this() = this(Identifiable.randomUID("w2v"))
 
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
@@ -122,7 +124,7 @@ final class Word2Vec extends Estimator[Word2VecModel] with Word2VecBase {
       .setSeed($(seed))
       .setVectorSize($(vectorSize))
       .fit(input)
-    copyValues(new Word2VecModel(this, wordVectors))
+    copyValues(new Word2VecModel(uid, wordVectors).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -136,7 +138,7 @@ final class Word2Vec extends Estimator[Word2VecModel] with Word2VecBase {
  */
 @AlphaComponent
 class Word2VecModel private[ml] (
-    override val parent: Word2Vec,
+    override val uid: String,
     wordVectors: feature.Word2VecModel)
   extends Model[Word2VecModel] with Word2VecBase {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 5a7ec29aac6cc..247e08be1bb15 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -40,12 +40,17 @@ import org.apache.spark.ml.util.Identifiable
  * @tparam T param value type
  */
 @AlphaComponent
-class Param[T] (val parent: Params, val name: String, val doc: String, val isValid: T => Boolean)
+class Param[T](val parent: String, val name: String, val doc: String, val isValid: T => Boolean)
   extends Serializable {
 
-  def this(parent: Params, name: String, doc: String) =
+  def this(parent: Identifiable, name: String, doc: String, isValid: T => Boolean) =
+    this(parent.uid, name, doc, isValid)
+
+  def this(parent: String, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue[T])
 
+  def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc)
+
   /**
    * Assert that the given value is valid for this parameter.
    *
@@ -60,8 +65,7 @@ class Param[T] (val parent: Params, val name: String, val doc: String, val isVal
    */
   private[param] def validate(value: T): Unit = {
     if (!isValid(value)) {
-      throw new IllegalArgumentException(s"$parent parameter $name given invalid value $value." +
-        s" Parameter description: $toString")
+      throw new IllegalArgumentException(s"$parent parameter $name given invalid value $value.")
     }
   }
 
@@ -75,19 +79,15 @@ class Param[T] (val parent: Params, val name: String, val doc: String, val isVal
    */
   def ->(value: T): ParamPair[T] = ParamPair(this, value)
 
-  /**
-   * Converts this param's name, doc, and optionally its default value and the user-supplied
-   * value in its parent to string.
-   */
-  override def toString: String = {
-    val valueStr = if (parent.isDefined(this)) {
-      val defaultValueStr = parent.getDefault(this).map("default: " + _)
-      val currentValueStr = parent.get(this).map("current: " + _)
-      (defaultValueStr ++ currentValueStr).mkString("(", ", ", ")")
-    } else {
-      "(undefined)"
+  override final def toString: String = s"${parent}__$name"
+
+  override final def hashCode: Int = toString.##
+
+  override final def equals(obj: Any): Boolean = {
+    obj match {
+      case p: Param[_] => (p.parent == parent) && (p.name == name)
+      case _ => false
     }
-    s"$name: $doc $valueStr"
   }
 }
 
@@ -173,49 +173,71 @@ object ParamValidators {
 // specialize primitive-typed params because Java doesn't recognize scala.Double, scala.Int, ...
 
 /** Specialized version of [[Param[Double]]] for Java. */
-class DoubleParam(parent: Params, name: String, doc: String, isValid: Double => Boolean)
+class DoubleParam(parent: String, name: String, doc: String, isValid: Double => Boolean)
   extends Param[Double](parent, name, doc, isValid) {
 
-  def this(parent: Params, name: String, doc: String) =
+  def this(parent: String, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
+  def this(parent: Identifiable, name: String, doc: String, isValid: Double => Boolean) =
+    this(parent.uid, name, doc, isValid)
+
+  def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc)
+
   override def w(value: Double): ParamPair[Double] = super.w(value)
 }
 
 /** Specialized version of [[Param[Int]]] for Java. */
-class IntParam(parent: Params, name: String, doc: String, isValid: Int => Boolean)
+class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolean)
   extends Param[Int](parent, name, doc, isValid) {
 
-  def this(parent: Params, name: String, doc: String) =
+  def this(parent: String, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
+  def this(parent: Identifiable, name: String, doc: String, isValid: Int => Boolean) =
+    this(parent.uid, name, doc, isValid)
+
+  def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc)
+
   override def w(value: Int): ParamPair[Int] = super.w(value)
 }
 
 /** Specialized version of [[Param[Float]]] for Java. */
-class FloatParam(parent: Params, name: String, doc: String, isValid: Float => Boolean)
+class FloatParam(parent: String, name: String, doc: String, isValid: Float => Boolean)
   extends Param[Float](parent, name, doc, isValid) {
 
-  def this(parent: Params, name: String, doc: String) =
+  def this(parent: String, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
+  def this(parent: Identifiable, name: String, doc: String, isValid: Float => Boolean) =
+    this(parent.uid, name, doc, isValid)
+
+  def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc)
+
   override def w(value: Float): ParamPair[Float] = super.w(value)
 }
 
 /** Specialized version of [[Param[Long]]] for Java. */
-class LongParam(parent: Params, name: String, doc: String, isValid: Long => Boolean)
+class LongParam(parent: String, name: String, doc: String, isValid: Long => Boolean)
   extends Param[Long](parent, name, doc, isValid) {
 
-  def this(parent: Params, name: String, doc: String) =
+  def this(parent: String, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
+  def this(parent: Identifiable, name: String, doc: String, isValid: Long => Boolean) =
+    this(parent.uid, name, doc, isValid)
+
+  def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc)
+
   override def w(value: Long): ParamPair[Long] = super.w(value)
 }
 
 /** Specialized version of [[Param[Boolean]]] for Java. */
-class BooleanParam(parent: Params, name: String, doc: String) // No need for isValid
+class BooleanParam(parent: String, name: String, doc: String) // No need for isValid
   extends Param[Boolean](parent, name, doc) {
 
+  def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc)
+
   override def w(value: Boolean): ParamPair[Boolean] = super.w(value)
 }
 
@@ -265,6 +287,9 @@ trait Params extends Identifiable with Serializable {
   /**
    * Returns all params sorted by their names. The default implementation uses Java reflection to
    * list all public methods that have no arguments and return [[Param]].
+   *
+   * Note: Developer should not use this method in constructor because we cannot guarantee that
+   * this variable gets initialized before other params.
    */
   lazy val params: Array[Param[_]] = {
     val methods = this.getClass.getMethods
@@ -299,15 +324,36 @@ trait Params extends Identifiable with Serializable {
    * those are checked during schema validation.
    */
   def validateParams(): Unit = {
-    params.filter(isDefined _).foreach { param =>
+    params.filter(isDefined).foreach { param =>
       param.asInstanceOf[Param[Any]].validate($(param))
     }
   }
 
   /**
-   * Returns the documentation of all params.
+   * Explains a param.
+   * @param param input param, must belong to this instance.
+   * @return a string that contains the input param name, doc, and optionally its default value and
+   *         the user-supplied value
+   */
+  def explainParam(param: Param[_]): String = {
+    shouldOwn(param)
+    val valueStr = if (isDefined(param)) {
+      val defaultValueStr = getDefault(param).map("default: " + _)
+      val currentValueStr = get(param).map("current: " + _)
+      (defaultValueStr ++ currentValueStr).mkString("(", ", ", ")")
+    } else {
+      "(undefined)"
+    }
+    s"${param.name}: ${param.doc} $valueStr"
+  }
+
+  /**
+   * Explains all params of this instance.
+   * @see [[explainParam()]]
    */
-  def explainParams(): String = params.mkString("\n")
+  def explainParams(): String = {
+    params.map(explainParam).mkString("\n")
+  }
 
   /** Checks whether a param is explicitly set. */
   final def isSet(param: Param[_]): Boolean = {
@@ -392,7 +438,6 @@ trait Params extends Identifiable with Serializable {
    * @param value  the default value
    */
   protected final def setDefault[T](param: Param[T], value: T): this.type = {
-    shouldOwn(param)
     defaultParamMap.put(param, value)
     this
   }
@@ -430,13 +475,13 @@ trait Params extends Identifiable with Serializable {
   }
 
   /**
-   * Creates a copy of this instance with a randomly generated uid and some extra params.
-   * The default implementation calls the default constructor to create a new instance, then
-   * copies the embedded and extra parameters over and returns the new instance.
+   * Creates a copy of this instance with the same UID and some extra params.
+   * The default implementation tries to create a new instance with the same UID.
+   * Then it copies the embedded and extra parameters over and returns the new instance.
    * Subclasses should override this method if the default approach is not sufficient.
    */
   def copy(extra: ParamMap): Params = {
-    val that = this.getClass.newInstance()
+    val that = this.getClass.getConstructor(classOf[String]).newInstance(uid)
     copyValues(that, extra)
     that
   }
@@ -465,7 +510,7 @@ trait Params extends Identifiable with Serializable {
 
   /** Validates that the input param belongs to this instance. */
   private def shouldOwn(param: Param[_]): Unit = {
-    require(param.parent.eq(this), s"Param $param does not belong to $this.")
+    require(param.parent == uid && hasParam(param.name), s"Param $param does not belong to $this.")
   }
 
   /**
@@ -581,7 +626,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
 
   override def toString: String = {
     map.toSeq.sortBy(_._1.name).map { case (param, value) =>
-      s"\t${param.parent.uid}-${param.name}: $value"
+      s"\t${param.parent}-${param.name}: $value"
     }.mkString("{\n", ",\n", "\n}")
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index d7cbffc3be26f..45c57b50da70f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -35,6 +35,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.optimization.NNLS
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
@@ -171,7 +172,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR
  * Model fitted by ALS.
  */
 class ALSModel private[ml] (
-    override val parent: ALS,
+    override val uid: String,
     k: Int,
     userFactors: RDD[(Int, Array[Float])],
     itemFactors: RDD[(Int, Array[Float])])
@@ -235,10 +236,12 @@ class ALSModel private[ml] (
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
-class ALS extends Estimator[ALSModel] with ALSParams {
+class ALS(override val uid: String) extends Estimator[ALSModel] with ALSParams {
 
   import org.apache.spark.ml.recommendation.ALS.Rating
 
+  def this() = this(Identifiable.randomUID("als"))
+
   /** @group setParam */
   def setRank(value: Int): this.type = set(rank, value)
 
@@ -303,7 +306,8 @@ class ALS extends Estimator[ALSModel] with ALSParams {
       maxIter = $(maxIter), regParam = $(regParam), implicitPrefs = $(implicitPrefs),
       alpha = $(alpha), nonnegative = $(nonnegative),
       checkpointInterval = $(checkpointInterval), seed = $(seed))
-    copyValues(new ALSModel(this, $(rank), userFactors, itemFactors))
+    val model = new ALSModel(uid, $(rank), userFactors, itemFactors).setParent(this)
+    copyValues(model)
   }
 
   override def transformSchema(schema: StructType): StructType = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index f8f0b161a4812..e67df21b2e4ae 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree.{TreeRegressorParams, DecisionTreeParams, DecisionTreeModel, Node}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree}
@@ -38,10 +38,12 @@ import org.apache.spark.sql.DataFrame
  * It supports both continuous and categorical features.
  */
 @AlphaComponent
-final class DecisionTreeRegressor
+final class DecisionTreeRegressor(override val uid: String)
   extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
   with DecisionTreeParams with TreeRegressorParams {
 
+  def this() = this(Identifiable.randomUID("dtr"))
+
   // Override parameter setters from parent trait for Java API compatibility.
 
   override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
@@ -91,7 +93,7 @@ object DecisionTreeRegressor {
  */
 @AlphaComponent
 final class DecisionTreeRegressionModel private[ml] (
-    override val parent: DecisionTreeRegressor,
+    override val uid: String,
     override val rootNode: Node)
   extends PredictionModel[Vector, DecisionTreeRegressionModel]
   with DecisionTreeModel with Serializable {
@@ -104,7 +106,7 @@ final class DecisionTreeRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): DecisionTreeRegressionModel = {
-    copyValues(new DecisionTreeRegressionModel(parent, rootNode), extra)
+    copyValues(new DecisionTreeRegressionModel(uid, rootNode), extra)
   }
 
   override def toString: String = {
@@ -128,6 +130,7 @@ private[ml] object DecisionTreeRegressionModel {
       s"Cannot convert non-regression DecisionTreeModel (old API) to" +
         s" DecisionTreeRegressionModel (new API).  Algo is: ${oldModel.algo}")
     val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures)
-    new DecisionTreeRegressionModel(parent, rootNode)
+    val uid = if (parent != null) parent.uid else Identifiable.randomUID("dtr")
+    new DecisionTreeRegressionModel(uid, rootNode)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 461905c12701a..4249ff5c1ebc7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.{Param, ParamMap}
 import org.apache.spark.ml.tree.{GBTParams, TreeRegressorParams, DecisionTreeModel, TreeEnsembleModel}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{GradientBoostedTrees => OldGBT}
@@ -42,10 +42,12 @@ import org.apache.spark.sql.DataFrame
  * It supports both continuous and categorical features.
  */
 @AlphaComponent
-final class GBTRegressor
+final class GBTRegressor(override val uid: String)
   extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
   with GBTParams with TreeRegressorParams with Logging {
 
+  def this() = this(Identifiable.randomUID("gbtr"))
+
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeRegressorParams:
@@ -149,7 +151,7 @@ object GBTRegressor {
  */
 @AlphaComponent
 final class GBTRegressionModel(
-    override val parent: GBTRegressor,
+    override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel],
     private val _treeWeights: Array[Double])
   extends PredictionModel[Vector, GBTRegressionModel]
@@ -173,7 +175,7 @@ final class GBTRegressionModel(
   }
 
   override def copy(extra: ParamMap): GBTRegressionModel = {
-    copyValues(new GBTRegressionModel(parent, _trees, _treeWeights), extra)
+    copyValues(new GBTRegressionModel(uid, _trees, _treeWeights), extra)
   }
 
   override def toString: String = {
@@ -199,6 +201,7 @@ private[ml] object GBTRegressionModel {
       // parent, fittingParamMap for each tree is null since there are no good ways to set these.
       DecisionTreeRegressionModel.fromOld(tree, null, categoricalFeatures)
     }
-    new GBTRegressionModel(parent, newTrees, oldModel.treeWeights)
+    val uid = if (parent != null) parent.uid else Identifiable.randomUID("gbtr")
+    new GBTRegressionModel(parent.uid, newTrees, oldModel.treeWeights)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 36c242bb5f2a7..3ebb78f79201a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -20,14 +20,14 @@ package org.apache.spark.ml.regression
 import scala.collection.mutable
 
 import breeze.linalg.{DenseVector => BDV, norm => brzNorm}
-import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS,
-  OWLQN => BreezeOWLQN}
+import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
 
 import org.apache.spark.Logging
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.{HasElasticNetParam, HasMaxIter, HasRegParam, HasTol}
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS._
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -59,9 +59,12 @@ private[regression] trait LinearRegressionParams extends PredictorParams
  *  - L2 + L1 (elastic net)
  */
 @AlphaComponent
-class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegressionModel]
+class LinearRegression(override val uid: String)
+  extends Regressor[Vector, LinearRegression, LinearRegressionModel]
   with LinearRegressionParams with Logging {
 
+  def this() = this(Identifiable.randomUID("linReg"))
+
   /**
    * Set the regularization parameter.
    * Default is 0.0.
@@ -128,7 +131,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
       logWarning(s"The standard deviation of the label is zero, so the weights will be zeros " +
         s"and the intercept will be the mean of the label; as a result, training is not needed.")
       if (handlePersistence) instances.unpersist()
-      return new LinearRegressionModel(this, Vectors.sparse(numFeatures, Seq()), yMean)
+      return new LinearRegressionModel(uid, Vectors.sparse(numFeatures, Seq()), yMean)
     }
 
     val featuresMean = summarizer.mean.toArray
@@ -182,7 +185,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
     if (handlePersistence) instances.unpersist()
 
     // TODO: Converts to sparse format based on the storage, but may base on the scoring speed.
-    new LinearRegressionModel(this, weights.compressed, intercept)
+    copyValues(new LinearRegressionModel(uid, weights.compressed, intercept))
   }
 }
 
@@ -193,7 +196,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
  */
 @AlphaComponent
 class LinearRegressionModel private[ml] (
-    override val parent: LinearRegression,
+    override val uid: String,
     val weights: Vector,
     val intercept: Double)
   extends RegressionModel[Vector, LinearRegressionModel]
@@ -204,7 +207,7 @@ class LinearRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): LinearRegressionModel = {
-    copyValues(new LinearRegressionModel(parent, weights, intercept), extra)
+    copyValues(new LinearRegressionModel(uid, weights, intercept), extra)
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index dbc628927433d..82437aa8de294 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree.{RandomForestParams, TreeRegressorParams, DecisionTreeModel, TreeEnsembleModel}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.{RandomForest => OldRandomForest}
@@ -37,10 +37,12 @@ import org.apache.spark.sql.DataFrame
  * It supports both continuous and categorical features.
  */
 @AlphaComponent
-final class RandomForestRegressor
+final class RandomForestRegressor(override val uid: String)
   extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
   with RandomForestParams with TreeRegressorParams {
 
+  def this() = this(Identifiable.randomUID("rfr"))
+
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeRegressorParams:
@@ -105,7 +107,7 @@ object RandomForestRegressor {
  */
 @AlphaComponent
 final class RandomForestRegressionModel private[ml] (
-    override val parent: RandomForestRegressor,
+    override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel])
   extends PredictionModel[Vector, RandomForestRegressionModel]
   with TreeEnsembleModel with Serializable {
@@ -128,7 +130,7 @@ final class RandomForestRegressionModel private[ml] (
   }
 
   override def copy(extra: ParamMap): RandomForestRegressionModel = {
-    copyValues(new RandomForestRegressionModel(parent, _trees), extra)
+    copyValues(new RandomForestRegressionModel(uid, _trees), extra)
   }
 
   override def toString: String = {
@@ -154,6 +156,6 @@ private[ml] object RandomForestRegressionModel {
       // parent, fittingParamMap for each tree is null since there are no good ways to set these.
       DecisionTreeRegressionModel.fromOld(tree, null, categoricalFeatures)
     }
-    new RandomForestRegressionModel(parent, newTrees)
+    new RandomForestRegressionModel(parent.uid, newTrees)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index ac0d1fed84b2e..5c6ff2dda3604 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -23,6 +23,7 @@ import org.apache.spark.Logging
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml._
 import org.apache.spark.ml.param._
+import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.types.StructType
@@ -81,7 +82,10 @@ private[ml] trait CrossValidatorParams extends Params {
  * K-fold cross validation.
  */
 @AlphaComponent
-class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorParams with Logging {
+class CrossValidator(override val uid: String) extends Estimator[CrossValidatorModel]
+  with CrossValidatorParams with Logging {
+
+  def this() = this(Identifiable.randomUID("cv"))
 
   private val f2jBLAS = new F2jBLAS
 
@@ -136,7 +140,7 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP
     logInfo(s"Best set of parameters:\n${epm(bestIndex)}")
     logInfo(s"Best cross-validation metric: $bestMetric.")
     val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]]
-    copyValues(new CrossValidatorModel(this, bestModel))
+    copyValues(new CrossValidatorModel(uid, bestModel).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -150,7 +154,7 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP
  */
 @AlphaComponent
 class CrossValidatorModel private[ml] (
-    override val parent: CrossValidator,
+    override val uid: String,
     val bestModel: Model[_])
   extends Model[CrossValidatorModel] with CrossValidatorParams {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
index 8a56748ab0a02..146697680092c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
@@ -19,15 +19,24 @@ package org.apache.spark.ml.util
 
 import java.util.UUID
 
+
 /**
- * Object with a unique id.
+ * Trait for an object with an immutable unique ID that identifies itself and its derivatives.
  */
-private[ml] trait Identifiable extends Serializable {
+trait Identifiable {
+
+  /**
+   * An immutable unique ID for the object and its derivatives.
+   */
+  val uid: String
+}
+
+object Identifiable {
 
   /**
-   * A unique id for the object. The default implementation concatenates the class name, "_", and 8
-   * random hex chars.
+   * Returns a random UID that concatenates the given prefix, "_", and 12 random hex chars.
    */
-  private[ml] val uid: String =
-    this.getClass.getSimpleName + "_" + UUID.randomUUID().toString.take(8)
+  def randomUID(prefix: String): String = {
+    prefix + "_" + UUID.randomUUID().toString.takeRight(12)
+  }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
index 7e7189a2b1d53..f75e024a713ee 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
@@ -84,7 +84,7 @@ public void logisticRegressionWithSetters() {
       .setThreshold(0.6)
       .setProbabilityCol("myProbability");
     LogisticRegressionModel model = lr.fit(dataset);
-    LogisticRegression parent = model.parent();
+    LogisticRegression parent = (LogisticRegression) model.parent();
     assert(parent.getMaxIter() == 10);
     assert(parent.getRegParam() == 1.0);
     assert(parent.getThreshold() == 0.6);
@@ -110,7 +110,7 @@ public void logisticRegressionWithSetters() {
     // Call fit() with new params, and check as many params as we can.
     LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1),
         lr.threshold().w(0.4), lr.probabilityCol().w("theProb"));
-    LogisticRegression parent2 = model2.parent();
+    LogisticRegression parent2 = (LogisticRegression) model2.parent();
     assert(parent2.getMaxIter() == 5);
     assert(parent2.getRegParam() == 0.1);
     assert(parent2.getThreshold() == 0.4);
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
index 8abe575610d19..3a41890b92d63 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
@@ -21,43 +21,65 @@
 
 import com.google.common.collect.Lists;
 
+import org.apache.spark.ml.util.Identifiable$;
+
 /**
  * A subclass of Params for testing.
  */
 public class JavaTestParams extends JavaParams {
 
-  public IntParam myIntParam;
+  public JavaTestParams() {
+    this.uid_ = Identifiable$.MODULE$.randomUID("javaTestParams");
+    init();
+  }
+
+  public JavaTestParams(String uid) {
+    this.uid_ = uid;
+    init();
+  }
+
+  private String uid_;
+
+  @Override
+  public String uid() {
+    return uid_;
+  }
 
-  public int getMyIntParam() { return (Integer)getOrDefault(myIntParam); }
+  private IntParam myIntParam_;
+  public IntParam myIntParam() { return myIntParam_; }
+
+  public int getMyIntParam() { return (Integer)getOrDefault(myIntParam_); }
 
   public JavaTestParams setMyIntParam(int value) {
-    set(myIntParam, value); return this;
+    set(myIntParam_, value); return this;
   }
 
-  public DoubleParam myDoubleParam;
+  private DoubleParam myDoubleParam_;
+  public DoubleParam myDoubleParam() { return myDoubleParam_; }
 
-  public double getMyDoubleParam() { return (Double)getOrDefault(myDoubleParam); }
+  public double getMyDoubleParam() { return (Double)getOrDefault(myDoubleParam_); }
 
   public JavaTestParams setMyDoubleParam(double value) {
-    set(myDoubleParam, value); return this;
+    set(myDoubleParam_, value); return this;
   }
 
-  public Param<String> myStringParam;
+  private Param<String> myStringParam_;
+  public Param<String> myStringParam() { return myStringParam_; }
 
-  public String getMyStringParam() { return (String)getOrDefault(myStringParam); }
+  public String getMyStringParam() { return getOrDefault(myStringParam_); }
 
   public JavaTestParams setMyStringParam(String value) {
-    set(myStringParam, value); return this;
+    set(myStringParam_, value); return this;
   }
 
-  public JavaTestParams() {
-    myIntParam = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0));
-    myDoubleParam = new DoubleParam(this, "myDoubleParam", "this is a double param",
+  private void init() {
+    myIntParam_ = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0));
+    myDoubleParam_ = new DoubleParam(this, "myDoubleParam", "this is a double param",
       ParamValidators.inRange(0.0, 1.0));
     List<String> validStrings = Lists.newArrayList("a", "b");
-    myStringParam = new Param<String>(this, "myStringParam", "this is a string param",
+    myStringParam_ = new Param<String>(this, "myStringParam", "this is a string param",
       ParamValidators.inArray(validStrings));
-    setDefault(myIntParam, 1);
-    setDefault(myDoubleParam, 0.5);
+    setDefault(myIntParam_, 1);
+    setDefault(myDoubleParam_, 0.5);
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java
index a82b86d560b6e..d591a456864e4 100644
--- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java
@@ -77,14 +77,14 @@ public void linearRegressionWithSetters() {
         .setMaxIter(10)
         .setRegParam(1.0);
     LinearRegressionModel model = lr.fit(dataset);
-    LinearRegression parent = model.parent();
+    LinearRegression parent = (LinearRegression) model.parent();
     assertEquals(10, parent.getMaxIter());
     assertEquals(1.0, parent.getRegParam(), 0.0);
 
     // Call fit() with new params, and check as many params as we can.
     LinearRegressionModel model2 =
         lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred"));
-    LinearRegression parent2 = model2.parent();
+    LinearRegression parent2 = (LinearRegression) model2.parent();
     assertEquals(5, parent2.getMaxIter());
     assertEquals(0.1, parent2.getRegParam(), 0.0);
     assertEquals("thePred", model2.getPredictionCol());
diff --git a/mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala b/mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala
new file mode 100644
index 0000000000000..67c262d0f9d8d
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.scalatest.FunSuite
+
+class IdentifiableSuite extends FunSuite {
+
+  import IdentifiableSuite.Test
+
+  test("Identifiable") {
+    val test0 = new Test("test_0")
+    assert(test0.uid === "test_0")
+
+    val test1 = new Test
+    assert(test1.uid.startsWith("test_"))
+  }
+}
+
+object IdentifiableSuite {
+
+  class Test(override val uid: String) extends Identifiable {
+    def this() = this(Identifiable.randomUID("test"))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 03af4ecd7a7e0..3fdc66be8a314 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -268,7 +268,7 @@ private[ml] object DecisionTreeClassifierSuite extends FunSuite {
     val newTree = dt.fit(newData)
     // Use parent, fittingParamMap from newTree since these are not checked anyways.
     val oldTreeAsNew = DecisionTreeClassificationModel.fromOld(
-      oldTree, newTree.parent, categoricalFeatures)
+      oldTree, newTree.parent.asInstanceOf[DecisionTreeClassifier], categoricalFeatures)
     TreeTests.checkEqual(oldTreeAsNew, newTree)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index 16c758b82c7cd..ea86867f1161a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -130,7 +130,7 @@ private object GBTClassifierSuite {
     val newModel = gbt.fit(newData)
     // Use parent, fittingParamMap from newTree since these are not checked anyways.
     val oldModelAsNew = GBTClassificationModel.fromOld(
-      oldModel, newModel.parent, categoricalFeatures)
+      oldModel, newModel.parent.asInstanceOf[GBTClassifier], categoricalFeatures)
     TreeTests.checkEqual(oldModelAsNew, newModel)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 4df8016009171..43765241a20b6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -19,13 +19,12 @@ package org.apache.spark.ml.classification
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.mllib.classification.LogisticRegressionSuite
+import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.sql.{DataFrame, Row, SQLContext}
 
-
 class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
   @transient var sqlContext: SQLContext = _
@@ -37,8 +36,7 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
     super.beforeAll()
     sqlContext = new SQLContext(sc)
 
-    dataset = sqlContext.createDataFrame(sc.parallelize(LogisticRegressionSuite
-      .generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42), 4))
+    dataset = sqlContext.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42))
 
     /**
      * Here is the instruction describing how to export the test data into CSV format
@@ -60,31 +58,30 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
       val xMean = Array(5.843, 3.057, 3.758, 1.199)
       val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
 
-      val testData = LogisticRegressionSuite.generateMultinomialLogisticInput(
-        weights, xMean, xVariance, true, nPoints, 42)
+      val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)
 
-      sqlContext.createDataFrame(sc.parallelize(LogisticRegressionSuite
-        .generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42), 4))
+      sqlContext.createDataFrame(
+        generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42))
     }
   }
 
   test("logistic regression: default params") {
     val lr = new LogisticRegression
-    assert(lr.getLabelCol == "label")
-    assert(lr.getFeaturesCol == "features")
-    assert(lr.getPredictionCol == "prediction")
-    assert(lr.getRawPredictionCol == "rawPrediction")
-    assert(lr.getProbabilityCol == "probability")
-    assert(lr.getFitIntercept == true)
+    assert(lr.getLabelCol === "label")
+    assert(lr.getFeaturesCol === "features")
+    assert(lr.getPredictionCol === "prediction")
+    assert(lr.getRawPredictionCol === "rawPrediction")
+    assert(lr.getProbabilityCol === "probability")
+    assert(lr.getFitIntercept)
     val model = lr.fit(dataset)
     model.transform(dataset)
       .select("label", "probability", "prediction", "rawPrediction")
       .collect()
     assert(model.getThreshold === 0.5)
-    assert(model.getFeaturesCol == "features")
-    assert(model.getPredictionCol == "prediction")
-    assert(model.getRawPredictionCol == "rawPrediction")
-    assert(model.getProbabilityCol == "probability")
+    assert(model.getFeaturesCol === "features")
+    assert(model.getPredictionCol === "prediction")
+    assert(model.getRawPredictionCol === "rawPrediction")
+    assert(model.getProbabilityCol === "probability")
     assert(model.intercept !== 0.0)
   }
 
@@ -103,7 +100,7 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
       .setThreshold(0.6)
       .setProbabilityCol("myProbability")
     val model = lr.fit(dataset)
-    val parent = model.parent
+    val parent = model.parent.asInstanceOf[LogisticRegression]
     assert(parent.getMaxIter === 10)
     assert(parent.getRegParam === 1.0)
     assert(parent.getThreshold === 0.6)
@@ -129,12 +126,12 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
     // Call fit() with new params, and check as many params as we can.
     val model2 = lr.fit(dataset, lr.maxIter -> 5, lr.regParam -> 0.1, lr.threshold -> 0.4,
       lr.probabilityCol -> "theProb")
-    val parent2 = model2.parent
+    val parent2 = model2.parent.asInstanceOf[LogisticRegression]
     assert(parent2.getMaxIter === 5)
     assert(parent2.getRegParam === 0.1)
     assert(parent2.getThreshold === 0.4)
     assert(model2.getThreshold === 0.4)
-    assert(model2.getProbabilityCol == "theProb")
+    assert(model2.getProbabilityCol === "theProb")
   }
 
   test("logistic regression: Predictor, Classifier methods") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index e65ffae918ca9..990cfb08af83b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -57,7 +57,7 @@ class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
   test("one-vs-rest: default params") {
     val numClasses = 3
     val ova = new OneVsRest()
-    ova.setClassifier(new LogisticRegression)
+      .setClassifier(new LogisticRegression)
     assert(ova.getLabelCol === "label")
     assert(ova.getPredictionCol === "prediction")
     val ovaModel = ova.fit(dataset)
@@ -97,7 +97,9 @@ class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
   }
 }
 
-private class MockLogisticRegression extends LogisticRegression {
+private class MockLogisticRegression(uid: String) extends LogisticRegression(uid) {
+
+  def this() = this("mockLogReg")
 
   setMaxIter(1)
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index c41def9330504..08f86fa45bc1d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -160,7 +160,7 @@ private object RandomForestClassifierSuite {
     val newModel = rf.fit(newData)
     // Use parent, fittingParamMap from newTree since these are not checked anyways.
     val oldModelAsNew = RandomForestClassificationModel.fromOld(
-      oldModel, newModel.parent, categoricalFeatures)
+      oldModel, newModel.parent.asInstanceOf[RandomForestClassifier], categoricalFeatures)
     TreeTests.checkEqual(oldModelAsNew, newModel)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
index 6056e7d3f6ff8..b96874f3a8821 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
@@ -23,21 +23,22 @@ class ParamsSuite extends FunSuite {
 
   test("param") {
     val solver = new TestParams()
+    val uid = solver.uid
     import solver.{maxIter, inputCol}
 
     assert(maxIter.name === "maxIter")
     assert(maxIter.doc === "max number of iterations (>= 0)")
-    assert(maxIter.parent.eq(solver))
-    assert(maxIter.toString === "maxIter: max number of iterations (>= 0) (default: 10)")
+    assert(maxIter.parent === uid)
+    assert(maxIter.toString === s"${uid}__maxIter")
     assert(!maxIter.isValid(-1))
     assert(maxIter.isValid(0))
     assert(maxIter.isValid(1))
 
     solver.setMaxIter(5)
-    assert(maxIter.toString ===
+    assert(solver.explainParam(maxIter) ===
       "maxIter: max number of iterations (>= 0) (default: 10, current: 5)")
 
-    assert(inputCol.toString === "inputCol: input column name (undefined)")
+    assert(inputCol.toString === s"${uid}__inputCol")
 
     intercept[IllegalArgumentException] {
       solver.setMaxIter(-1)
@@ -118,7 +119,10 @@ class ParamsSuite extends FunSuite {
     assert(!solver.isDefined(inputCol))
     intercept[NoSuchElementException](solver.getInputCol)
 
-    assert(solver.explainParams() === Seq(inputCol, maxIter).mkString("\n"))
+    assert(solver.explainParam(maxIter) ===
+      "maxIter: max number of iterations (>= 0) (default: 10, current: 100)")
+    assert(solver.explainParams() ===
+      Seq(inputCol, maxIter).map(solver.explainParam).mkString("\n"))
 
     assert(solver.getParam("inputCol").eq(inputCol))
     assert(solver.getParam("maxIter").eq(maxIter))
@@ -148,7 +152,7 @@ class ParamsSuite extends FunSuite {
     assert(!solver.isSet(maxIter))
 
     val copied = solver.copy(ParamMap(solver.maxIter -> 50))
-    assert(copied.uid !== solver.uid)
+    assert(copied.uid === solver.uid)
     assert(copied.getInputCol === solver.getInputCol)
     assert(copied.getMaxIter === 50)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
index dc16073640407..a9e78366ad98f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
@@ -18,9 +18,12 @@
 package org.apache.spark.ml.param
 
 import org.apache.spark.ml.param.shared.{HasInputCol, HasMaxIter}
+import org.apache.spark.ml.util.Identifiable
 
 /** A subclass of Params for testing. */
-class TestParams extends Params with HasMaxIter with HasInputCol {
+class TestParams(override val uid: String) extends Params with HasMaxIter with HasInputCol {
+
+  def this() = this(Identifiable.randomUID("testParams"))
 
   def setMaxIter(value: Int): this.type = { set(maxIter, value); this }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 5aa81b44ddaf9..1196a772dfdd4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -85,7 +85,7 @@ private[ml] object DecisionTreeRegressorSuite extends FunSuite {
     val newTree = dt.fit(newData)
     // Use parent, fittingParamMap from newTree since these are not checked anyways.
     val oldTreeAsNew = DecisionTreeRegressionModel.fromOld(
-      oldTree, newTree.parent, categoricalFeatures)
+      oldTree, newTree.parent.asInstanceOf[DecisionTreeRegressor], categoricalFeatures)
     TreeTests.checkEqual(oldTreeAsNew, newTree)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 25b36ab08b67c..40e7e3273e965 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -130,7 +130,8 @@ private object GBTRegressorSuite {
     val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses = 0)
     val newModel = gbt.fit(newData)
     // Use parent, fittingParamMap from newTree since these are not checked anyways.
-    val oldModelAsNew = GBTRegressionModel.fromOld(oldModel, newModel.parent, categoricalFeatures)
+    val oldModelAsNew = GBTRegressionModel.fromOld(
+      oldModel, newModel.parent.asInstanceOf[GBTRegressor], categoricalFeatures)
     TreeTests.checkEqual(oldModelAsNew, newModel)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index 45f09f4fdab81..3efffbb763b78 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -116,7 +116,7 @@ private object RandomForestRegressorSuite extends FunSuite {
     val newModel = rf.fit(newData)
     // Use parent, fittingParamMap from newTree since these are not checked anyways.
     val oldModelAsNew = RandomForestRegressionModel.fromOld(
-      oldModel, newModel.parent, categoricalFeatures)
+      oldModel, newModel.parent.asInstanceOf[RandomForestRegressor], categoricalFeatures)
     TreeTests.checkEqual(oldModelAsNew, newModel)
   }
 }

From c1080b6fddb22d84694da2453e46a03fbc041576 Mon Sep 17 00:00:00 2001
From: DB Tsai <dbt@netflix.com>
Date: Thu, 14 May 2015 01:26:08 -0700
Subject: [PATCH 168/320] [SPARK-7568] [ML] ml.LogisticRegression doesn't
 output the right prediction

The difference is because we previously don't fit the intercept in Spark 1.3. Here, we change the input `String` so that the probability of instance 6 can be classified as `1.0` without any ambiguity.

with lambda = 0.001 in current LOR implementation, the prediction is
```
(4, spark i j k) --> prob=[0.1596407738787411,0.8403592261212589], prediction=1.0
(5, l m n) --> prob=[0.8378325685476612,0.16216743145233883], prediction=0.0
(6, spark hadoop spark) --> prob=[0.0692663313297627,0.9307336686702373], prediction=1.0
(7, apache hadoop) --> prob=[0.9821575333444208,0.01784246665557917], prediction=0.0
```
and the training accuracy is
```
(0, a b c d e spark) --> prob=[0.0021342419881406746,0.9978657580118594], prediction=1.0
(1, b d) --> prob=[0.9959176174854043,0.004082382514595685], prediction=0.0
(2, spark f g h) --> prob=[0.0014541569986711233,0.9985458430013289], prediction=1.0
(3, hadoop mapreduce) --> prob=[0.9982978367343561,0.0017021632656438518], prediction=0.0
```

Author: DB Tsai <dbt@netflix.com>

Closes #6109 from dbtsai/lor-example and squashes the following commits:

ac63ce4 [DB Tsai] first commit
---
 .../examples/ml/JavaSimpleTextClassificationPipeline.java     | 4 ++--
 .../src/main/python/ml/simple_text_classification_pipeline.py | 4 ++--
 .../spark/examples/ml/SimpleTextClassificationPipeline.scala  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
index ef1ec103a879f..54738813d0016 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java
@@ -66,7 +66,7 @@ public static void main(String[] args) {
       .setOutputCol("features");
     LogisticRegression lr = new LogisticRegression()
       .setMaxIter(10)
-      .setRegParam(0.01);
+      .setRegParam(0.001);
     Pipeline pipeline = new Pipeline()
       .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
 
@@ -77,7 +77,7 @@ public static void main(String[] args) {
     List<Document> localTest = Lists.newArrayList(
       new Document(4L, "spark i j k"),
       new Document(5L, "l m n"),
-      new Document(6L, "mapreduce spark"),
+      new Document(6L, "spark hadoop spark"),
       new Document(7L, "apache hadoop"));
     DataFrame test = jsql.createDataFrame(jsc.parallelize(localTest), Document.class);
 
diff --git a/examples/src/main/python/ml/simple_text_classification_pipeline.py b/examples/src/main/python/ml/simple_text_classification_pipeline.py
index fab21f003b233..b4f06bf888746 100644
--- a/examples/src/main/python/ml/simple_text_classification_pipeline.py
+++ b/examples/src/main/python/ml/simple_text_classification_pipeline.py
@@ -48,7 +48,7 @@
     # Configure an ML pipeline, which consists of tree stages: tokenizer, hashingTF, and lr.
     tokenizer = Tokenizer(inputCol="text", outputCol="words")
     hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
-    lr = LogisticRegression(maxIter=10, regParam=0.01)
+    lr = LogisticRegression(maxIter=10, regParam=0.001)
     pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
 
     # Fit the pipeline to training documents.
@@ -58,7 +58,7 @@
     Document = Row("id", "text")
     test = sc.parallelize([(4, "spark i j k"),
                            (5, "l m n"),
-                           (6, "mapreduce spark"),
+                           (6, "spark hadoop spark"),
                            (7, "apache hadoop")]) \
         .map(lambda x: Document(*x)).toDF()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
index 6772efd2c581c..1324b066c30c3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
@@ -64,7 +64,7 @@ object SimpleTextClassificationPipeline {
       .setOutputCol("features")
     val lr = new LogisticRegression()
       .setMaxIter(10)
-      .setRegParam(0.01)
+      .setRegParam(0.001)
     val pipeline = new Pipeline()
       .setStages(Array(tokenizer, hashingTF, lr))
 
@@ -75,7 +75,7 @@ object SimpleTextClassificationPipeline {
     val test = sc.parallelize(Seq(
       Document(4L, "spark i j k"),
       Document(5L, "l m n"),
-      Document(6L, "mapreduce spark"),
+      Document(6L, "spark hadoop spark"),
       Document(7L, "apache hadoop")))
 
     // Make predictions on test documents.

From 7fb715de6d90c3eb756935440f75b1de674f8ece Mon Sep 17 00:00:00 2001
From: FavioVazquez <favio.vazquezp@gmail.com>
Date: Thu, 14 May 2015 15:22:58 +0100
Subject: [PATCH 169/320] [SPARK-7249] Updated Hadoop dependencies due to
 inconsistency in the versions

Updated Hadoop dependencies due to inconsistency in the versions. Now the global properties are the ones used by the hadoop-2.2 profile, and the profile was set to empty but kept for backwards compatibility reasons.

Changes proposed by vanzin resulting from previous pull-request https://github.com/apache/spark/pull/5783 that did not fixed the problem correctly.

Please let me know if this is the correct way of doing this, the comments of vanzin are in the pull-request mentioned.

Author: FavioVazquez <favio.vazquezp@gmail.com>

Closes #5786 from FavioVazquez/update-hadoop-dependencies and squashes the following commits:

11670e5 [FavioVazquez] - Added missing instance of -Phadoop-2.2 in create-release.sh
379f50d [FavioVazquez] - Added instances of -Phadoop-2.2 in create-release.sh, run-tests, scalastyle and building-spark.md - Reconstructed docs to not ask users to rely on default behavior
3f9249d [FavioVazquez] Merge branch 'master' of https://github.com/apache/spark into update-hadoop-dependencies
31bdafa [FavioVazquez] - Added missing instances in -Phadoop-1 in create-release.sh, run-tests and in the building-spark documentation
cbb93e8 [FavioVazquez] - Added comment related to SPARK-3710 about  hadoop-yarn-server-tests in Hadoop 2.2 that fails to pull some needed dependencies
83dc332 [FavioVazquez] - Cleaned up the main POM concerning the yarn profile - Erased hadoop-2.2 profile from yarn/pom.xml and its content was integrated into yarn/pom.xml
93f7624 [FavioVazquez] - Deleted unnecessary comments and <activation> tag on the YARN profile in the main POM
668d126 [FavioVazquez] - Moved <dependencies> <activation> and <properties> sections of the hadoop-2.2 profile in the YARN POM to the YARN profile in the root POM - Erased unnecessary hadoop-2.2 profile from the YARN POM
fda6a51 [FavioVazquez] - Updated hadoop1 releases in create-release.sh  due to changes in the default hadoop version set - Erased unnecessary instance of -Dyarn.version=2.2.0 in create-release.sh - Prettify comment in yarn/pom.xml
0470587 [FavioVazquez] - Erased unnecessary instance of -Phadoop-2.2 -Dhadoop.version=2.2.0 in create-release.sh - Updated how the releases are made in the create-release.sh no that the default hadoop version is the 2.2.0 - Erased unnecessary instance of -Phadoop-2.2 -Dhadoop.version=2.2.0 in scalastyle - Erased unnecessary instance of -Phadoop-2.2 -Dhadoop.version=2.2.0 in run-tests - Better example given in the hadoop-third-party-distributions.md now that the default hadoop version is 2.2.0
a650779 [FavioVazquez] - Default value of avro.mapred.classifier has been set to hadoop2 in pom.xml - Cleaned up hadoop-2.3 and 2.4 profiles due to change in the default set in avro.mapred.classifier in pom.xml
199f40b [FavioVazquez] - Erased unnecessary CDH5-specific note in docs/building-spark.md - Remove example of instance -Phadoop-2.2 -Dhadoop.version=2.2.0 in docs/building-spark.md - Enabled hadoop-2.2 profile when the Hadoop version is 2.2.0, which is now the default .Added comment in the yarn/pom.xml to specify that.
88a8b88 [FavioVazquez] - Simplified Hadoop profiles due to new setting of global properties in the pom.xml file - Added comment to specify that the hadoop-2.2 profile is now the default hadoop profile in the pom.xml file - Erased hadoop-2.2 from related hadoop profiles now that is a no-op in the make-distribution.sh file
70b8344 [FavioVazquez] - Fixed typo in the make-distribution.sh file and added hadoop-1 in the Related profiles
287fa2f [FavioVazquez] - Updated documentation about specifying the hadoop version in building-spark. Now is clear that Spark will build against Hadoop 2.2.0 by default. - Added Cloudera CDH 5.3.3 without MapReduce example in the building-spark doc.
1354292 [FavioVazquez] - Fixed hadoop-1 version to match jenkins build profile in hadoop1.0 tests and documentation
6b4bfaf [FavioVazquez] - Cleanup in hadoop-2.x profiles since they contained mostly redundant stuff.
7e9955d [FavioVazquez] - Updated Hadoop dependencies due to inconsistency in the versions. Now the global properties are the ones used by the hadoop-2.2 profile, and the profile was set to empty but kept for backwards compatibility reasons
660decc [FavioVazquez] - Updated Hadoop dependencies due to inconsistency in the versions. Now the global properties are the ones used by the hadoop-2.2 profile, and the profile was set to empty but kept for backwards compatibility reasons
ec91ce3 [FavioVazquez] - Updated protobuf-java version of com.google.protobuf dependancy to fix blocking error when connecting to HDFS via the Hadoop Cloudera HDFS CDH5 (fix for 2.5.0-cdh5.3.3 version)
---
 dev/create-release/create-release.sh     | 14 ++--
 dev/run-tests                            |  6 +-
 dev/scalastyle                           |  4 +-
 docs/building-spark.md                   | 11 +--
 docs/hadoop-third-party-distributions.md |  2 +-
 make-distribution.sh                     |  2 +-
 pom.xml                                  | 33 ++++----
 yarn/pom.xml                             | 97 +++++++++++-------------
 8 files changed, 79 insertions(+), 90 deletions(-)

diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 3dbb35f7054a2..af4f00054997c 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -118,14 +118,14 @@ if [[ ! "$@" =~ --skip-publish ]]; then
 
   rm -rf $SPARK_REPO
 
-  build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-    -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+  build/mvn -DskipTests -Pyarn -Phive \
+    -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     clean install
 
   ./dev/change-version-to-2.11.sh
   
-  build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-    -Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+  build/mvn -DskipTests -Pyarn -Phive \
+    -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     clean install
 
   ./dev/change-version-to-2.10.sh
@@ -228,9 +228,9 @@ if [[ ! "$@" =~ --skip-package ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
   # share the same Zinc server.
-  make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" "3030" &
-  make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" "3031" &
-  make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
+  make_binary_release "hadoop1" "-Phadoop-1 -Phive -Phive-thriftserver" "3030" &
+  make_binary_release "hadoop1-scala2.11" "-Phadoop-1 -Phive -Dscala-2.11" "3031" &
+  make_binary_release "cdh4" "-Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
   make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
   make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
   make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" "3035" &
diff --git a/dev/run-tests b/dev/run-tests
index ef587a1a5988c..44d802782c4a4 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -40,11 +40,11 @@ function handle_error () {
 {
   if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
     if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
-      export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
+      export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=1.0.4"
     elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
-      export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
+      export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=2.0.0-mr1-cdh4.1.1"
     elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
-      export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0"
+      export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2"
     elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
       export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
     fi
diff --git a/dev/scalastyle b/dev/scalastyle
index 4e03f89ed5d5d..7f014c82f14c6 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -20,8 +20,8 @@
 echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
 echo -e "q\n" | build/sbt -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt
 # Check style with YARN built too
-echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle >> scalastyle.txt
-echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 test:scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt
 
 ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
 rm scalastyle.txt
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 287fcd3c4034f..6e310ff424784 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -59,14 +59,14 @@ You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
 
 # Specifying the Hadoop Version
 
-Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions:
+Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions:
 
 <table class="table">
   <thead>
     <tr><th>Hadoop version</th><th>Profile required</th></tr>
   </thead>
   <tbody>
-    <tr><td>1.x to 2.1.x</td><td>(none)</td></tr>
+    <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
     <tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
     <tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
     <tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
@@ -77,10 +77,10 @@ For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hado
 
 {% highlight bash %}
 # Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -DskipTests clean package
+mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
 
 # Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
+mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
 {% endhighlight %}
 
 You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later.
@@ -88,8 +88,9 @@ You can enable the "yarn" profile and optionally set the "yarn.version" property
 Examples:
 
 {% highlight bash %}
+
 # Apache Hadoop 2.2.X
-mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
+mvn -Pyarn -Phadoop-2.2 -DskipTests clean package
 
 # Apache Hadoop 2.3.X
 mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index 96bd69ca3b33b..795dd82a6be06 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -14,7 +14,7 @@ property. For certain versions, you will need to specify additional profiles. Fo
 see the guide on [building with maven](building-spark.html#specifying-the-hadoop-version):
 
     mvn -Dhadoop.version=1.0.4 -DskipTests clean package
-    mvn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
+    mvn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
 
 The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that
 some Hadoop releases are binary compatible across client versions. This means the pre-built Spark
diff --git a/make-distribution.sh b/make-distribution.sh
index 1bfa9acb1fe6e..8d6e91d67593f 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -58,7 +58,7 @@ while (( "$#" )); do
     --hadoop)
       echo "Error: '--hadoop' is no longer supported:"
       echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
-      echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
+      echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4."
       exit_with_usage
       ;;
     --with-yarn)
diff --git a/pom.xml b/pom.xml
index 564a443466e5a..91d1d843c762a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,9 +122,9 @@
     <slf4j.version>1.7.10</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <hadoop.version>2.2.0</hadoop.version>
-    <protobuf.version>2.4.1</protobuf.version>
+    <protobuf.version>2.5.0</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
-    <hbase.version>0.98.7-hadoop1</hbase.version>
+    <hbase.version>0.98.7-hadoop2</hbase.version>
     <hbase.artifact>hbase</hbase.artifact>
     <flume.version>1.4.0</flume.version>
     <zookeeper.version>3.4.5</zookeeper.version>
@@ -143,7 +143,7 @@
     <oro.version>2.0.8</oro.version>
     <codahale.metrics.version>3.1.0</codahale.metrics.version>
     <avro.version>1.7.7</avro.version>
-    <avro.mapred.classifier></avro.mapred.classifier>
+    <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
     <jets3t.version>0.7.1</jets3t.version>
     <aws.java.sdk.version>1.8.3</aws.java.sdk.version>
     <aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
@@ -155,7 +155,7 @@
     <jline.version>${scala.version}</jline.version>
     <jline.groupid>org.scala-lang</jline.groupid>
     <jodd.version>3.6.3</jodd.version>
-    <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
+    <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.4.4</fasterxml.jackson.version>
     <snappy.version>1.1.1.7</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
@@ -1644,26 +1644,27 @@
     -->
 
     <profile>
-      <id>hadoop-2.2</id>
+      <id>hadoop-1</id>
       <properties>
-        <hadoop.version>2.2.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <hbase.version>0.98.7-hadoop2</hbase.version>
-        <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
-        <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
+        <hadoop.version>1.0.4</hadoop.version>
+        <protobuf.version>2.4.1</protobuf.version>
+        <hbase.version>0.98.7-hadoop1</hbase.version>
+        <avro.mapred.classifier>hadoop1</avro.mapred.classifier>
+        <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.2</id>
+    <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
+    </profile>
+
     <profile>
       <id>hadoop-2.3</id>
       <properties>
         <hadoop.version>2.3.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
         <jets3t.version>0.9.3</jets3t.version>
-        <hbase.version>0.98.7-hadoop2</hbase.version>
         <commons.math3.version>3.1.1</commons.math3.version>
-        <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
-        <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
       </properties>
     </profile>
 
@@ -1671,12 +1672,8 @@
       <id>hadoop-2.4</id>
       <properties>
         <hadoop.version>2.4.0</hadoop.version>
-        <protobuf.version>2.5.0</protobuf.version>
         <jets3t.version>0.9.3</jets3t.version>
-        <hbase.version>0.98.7-hadoop2</hbase.version>
         <commons.math3.version>3.1.1</commons.math3.version>
-        <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
-        <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
       </properties>
     </profile>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 7c8c3613e7a05..00d219f836708 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -30,6 +30,7 @@
   <name>Spark Project YARN</name>
   <properties>
     <sbt.project.name>yarn</sbt.project.name>
+    <jersey.version>1.9</jersey.version>
   </properties>
 
   <dependencies>
@@ -85,7 +86,12 @@
       <artifactId>jetty-servlet</artifactId>
     </dependency>
     <!-- End of shaded deps. -->
-
+    
+     <!--
+    See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
+    dependencies, so they need to be added manually for the tests to work.
+    -->
+     
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-tests</artifactId>
@@ -97,59 +103,44 @@
       <artifactId>mockito-all</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+      <version>6.1.26</version>
+      <exclusions>
+       <exclusion>
+        <groupId>org.mortbay.jetty</groupId>
+         <artifactId>servlet-api</artifactId>
+       </exclusion>
+      </exclusions>
+      <scope>test</scope>
+     </dependency>
+     <dependency>
+       <groupId>com.sun.jersey</groupId>
+       <artifactId>jersey-core</artifactId>
+       <version>${jersey.version}</version>
+       <scope>test</scope>
+     </dependency>
+     <dependency>
+       <groupId>com.sun.jersey</groupId>
+       <artifactId>jersey-json</artifactId>
+       <version>${jersey.version}</version>
+       <scope>test</scope>
+       <exclusions>
+        <exclusion>
+          <groupId>stax</groupId>
+          <artifactId>stax-api</artifactId>
+        </exclusion>
+       </exclusions>
+     </dependency>
+     <dependency>
+       <groupId>com.sun.jersey</groupId>
+       <artifactId>jersey-server</artifactId>
+       <version>${jersey.version}</version>
+       <scope>test</scope>
+     </dependency>
   </dependencies>
-
-  <!--
-    See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
-    dependencies, so they need to be added manually for the tests to work.
-  -->
-  <profiles>
-    <profile>
-      <id>hadoop-2.2</id>
-      <properties>
-        <jersey.version>1.9</jersey.version>
-      </properties>
-      <dependencies>
-        <dependency>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>jetty</artifactId>
-          <version>6.1.26</version>
-          <exclusions>
-            <exclusion>
-              <groupId>org.mortbay.jetty</groupId>
-              <artifactId>servlet-api</artifactId>
-            </exclusion>
-          </exclusions>
-          <scope>test</scope>
-        </dependency>
-        <dependency>
-          <groupId>com.sun.jersey</groupId>
-          <artifactId>jersey-core</artifactId>
-          <version>${jersey.version}</version>
-          <scope>test</scope>
-        </dependency>
-        <dependency>
-          <groupId>com.sun.jersey</groupId>
-          <artifactId>jersey-json</artifactId>
-          <version>${jersey.version}</version>
-          <scope>test</scope>
-          <exclusions>
-            <exclusion>
-              <groupId>stax</groupId>
-              <artifactId>stax-api</artifactId>
-            </exclusion>
-          </exclusions>
-        </dependency>
-        <dependency>
-          <groupId>com.sun.jersey</groupId>
-          <artifactId>jersey-server</artifactId>
-          <version>${jersey.version}</version>
-          <scope>test</scope>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
+  
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>

From f2cd00be350fdba3acfbfdf155701182d1c404fd Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Thu, 14 May 2015 10:25:18 -0700
Subject: [PATCH 170/320] [SQL][minor] rename apply for QueryPlanner

A follow-up of https://github.com/apache/spark/pull/5624

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6142 from cloud-fan/tmp and squashes the following commits:

971a92b [Wenchen Fan] use plan instead of execute
24c5ffe [Wenchen Fan] rename apply
---
 .../org/apache/spark/sql/catalyst/planning/QueryPlanner.scala | 4 ++--
 sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
index 51b5699affed5..73a21884a4710 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
@@ -51,9 +51,9 @@ abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] {
    * filled in automatically by the QueryPlanner using the other execution strategies that are
    * available.
    */
-  protected def planLater(plan: LogicalPlan) = apply(plan).next()
+  protected def planLater(plan: LogicalPlan) = this.plan(plan).next()
 
-  def apply(plan: LogicalPlan): Iterator[PhysicalPlan] = {
+  def plan(plan: LogicalPlan): Iterator[PhysicalPlan] = {
     // Obviously a lot to do here still...
     val iter = strategies.view.flatMap(_(plan)).toIterator
     assert(iter.hasNext, s"No plan for $plan")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 521f3dc821795..b33a700208014 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -1321,7 +1321,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     // TODO: Don't just pick the first one...
     lazy val sparkPlan: SparkPlan = {
       SparkPlan.currentContext.set(self)
-      planner(optimizedPlan).next()
+      planner.plan(optimizedPlan).next()
     }
     // executedPlan should not be used to initialize any SparkPlan. It should be
     // only used for execution.

From 5d7d4f887d509e6d037d8fc5247d2e5f8a4563c9 Mon Sep 17 00:00:00 2001
From: ksonj <kson@siberie.de>
Date: Thu, 14 May 2015 15:10:58 -0700
Subject: [PATCH 171/320] [SPARK-7278] [PySpark] DateType should find
 datetime.datetime acceptable

DateType should not be restricted to `datetime.date` but accept `datetime.datetime` objects as well. Could someone with a little more insight verify this?

Author: ksonj <kson@siberie.de>

Closes #6057 from ksonj/dates and squashes the following commits:

68a158e [ksonj] DateType should find datetime.datetime acceptable too
---
 python/pyspark/sql/_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/_types.py
index b96851a174d49..629c3a94513b8 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/_types.py
@@ -930,7 +930,7 @@ def _infer_schema_type(obj, dataType):
     DecimalType: (decimal.Decimal,),
     StringType: (str, unicode),
     BinaryType: (bytearray,),
-    DateType: (datetime.date,),
+    DateType: (datetime.date, datetime.datetime),
     TimestampType: (datetime.datetime,),
     ArrayType: (list, tuple, array),
     MapType: (dict,),

From 11a1a135d1fe892cd48a9116acc7554846aed84c Mon Sep 17 00:00:00 2001
From: tedyu <yuzhihong@gmail.com>
Date: Thu, 14 May 2015 15:26:35 -0700
Subject: [PATCH 172/320] Make SPARK prefix a variable

Author: tedyu <yuzhihong@gmail.com>

Closes #6153 from ted-yu/master and squashes the following commits:

4e0bac5 [tedyu] Use JIRA_PROJECT_NAME as variable name
ab982aa [tedyu] Make SPARK prefix a variable
---
 dev/github_jira_sync.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
index 8051080117062..ff1e39664ee04 100755
--- a/dev/github_jira_sync.py
+++ b/dev/github_jira_sync.py
@@ -33,6 +33,7 @@
 
 # User facing configs
 GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
+JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK")
 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
 JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
 JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
@@ -68,7 +69,7 @@ def get_jira_prs():
 	page_json = get_json(page)
 
 	for pull in page_json:
-	    jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
+	    jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
 	    for jira in jiras:
 		result = result + [(jira,  pull)]
 

From 93dbb3ad83fd60444a38c3dc87a2053c667123af Mon Sep 17 00:00:00 2001
From: Rex Xiong <pengx@microsoft.com>
Date: Thu, 14 May 2015 16:55:31 -0700
Subject: [PATCH 173/320] [SPARK-7598] [DEPLOY] Add aliveWorkers metrics in
 Master

In Spark Standalone setup, when some workers are DEAD, they will stay in master worker list for a while.
master.workers metrics for master is only showing the total number of workers, we need to monitor how many real ALIVE workers are there to ensure the cluster is healthy.

Author: Rex Xiong <pengx@microsoft.com>

Closes #6117 from twilightgod/add-aliveWorker-metrics and squashes the following commits:

6be69a5 [Rex Xiong] Fix comment for aliveWorkers metrics
a882f39 [Rex Xiong] Fix style for aliveWorkers metrics
38ce955 [Rex Xiong] Add aliveWorkers metrics in Master
---
 .../scala/org/apache/spark/deploy/master/MasterSource.scala  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
index 9c3f79f1244b7..66a9ff38678c6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
@@ -30,6 +30,11 @@ private[spark] class MasterSource(val master: Master) extends Source {
     override def getValue: Int = master.workers.size
   })
 
+  // Gauge for alive worker numbers in cluster
+  metricRegistry.register(MetricRegistry.name("aliveWorkers"), new Gauge[Int]{
+    override def getValue: Int = master.workers.filter(_.state == WorkerState.ALIVE).size
+  })
+
   // Gauge for application numbers in cluster
   metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] {
     override def getValue: Int = master.apps.size

From 57ed16cf9372c109e84bd51b728f2c82940949a7 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 14 May 2015 16:56:32 -0700
Subject: [PATCH 174/320] [SPARK-7643] [UI] use the correct size in RDDPage for
 storage info and partitions

`dataDistribution` and `partitions` are `Option[Seq[_]]`. andrewor14 squito

Author: Xiangrui Meng <meng@databricks.com>

Closes #6157 from mengxr/SPARK-7643 and squashes the following commits:

99fe8a4 [Xiangrui Meng] use the correct size in RDDPage for storage info and partitions
---
 .../main/scala/org/apache/spark/ui/storage/RDDPage.scala   | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 05f94a7507f4f..fbce917a0824d 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -77,14 +77,17 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
+          <h4>
+            Data Distribution on {rddStorageInfo.dataDistribution.map(_.size).getOrElse(0)}
+            Executors
+          </h4>
           {workerTable}
         </div>
       </div>
 
       <div class="row-fluid">
         <div class="span12">
-          <h4> {rddStorageInfo.partitions.size} Partitions </h4>
+          <h4> {rddStorageInfo.partitions.map(_.size).getOrElse(0)} Partitions </h4>
           {blockTable}
         </div>
       </div>;

From 0a317c124c3a43089cdb8f079345c8f2842238cd Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Thu, 14 May 2015 16:57:33 -0700
Subject: [PATCH 175/320] [SPARK-7649] [STREAMING] [WEBUI] Use
 window.localStorage to store the status rather than the url

Use window.localStorage to store the status rather than the url so that the url won't be changed.

cc tdas

Author: zsxwing <zsxwing@gmail.com>

Closes #6158 from zsxwing/SPARK-7649 and squashes the following commits:

3c56fef [zsxwing] Use window.localStorage to store the status rather than the url
---
 .../apache/spark/ui/static/streaming-page.js  | 20 ++++---------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
index 22b186873e990..0fac658d57842 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
@@ -252,28 +252,16 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
 }
 
 $(function() {
-    function getParameterFromURL(param)
-    {
-        var parameters = window.location.search.substring(1); // Remove "?"
-        var keyValues = parameters.split('&');
-        for (var i = 0; i < keyValues.length; i++)
-        {
-            var paramKeyValue = keyValues[i].split('=');
-            if (paramKeyValue[0] == param)
-            {
-                return paramKeyValue[1];
-            }
-        }
-    }
-
-    var status = getParameterFromURL("show-streams-detail") == "true";
+    var status = window.localStorage && window.localStorage.getItem("show-streams-detail") == "true";
 
     $("span.expand-input-rate").click(function() {
         status = !status;
         $("#inputs-table").toggle('collapsed');
         // Toggle the class of the arrow between open and closed
         $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
-        window.history.pushState('', document.title, window.location.pathname + '?show-streams-detail=' + status);
+        if (window.localStorage) {
+            window.localStorage.setItem("show-streams-detail", "" + status);
+        }
     });
 
     if (status) {

From b208f998b5800bdba4ce6651f172c26a8d7d351b Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Thu, 14 May 2015 16:58:36 -0700
Subject: [PATCH 176/320] [SPARK-7645] [STREAMING] [WEBUI] Show milliseconds in
 the UI if the batch interval < 1 second

I also updated the summary of the Streaming page.

![screen shot 2015-05-14 at 11 52 59 am](https://cloud.githubusercontent.com/assets/1000778/7640103/13cdf68e-fa36-11e4-84ec-e2a3954f4319.png)
![screen shot 2015-05-14 at 12 39 33 pm](https://cloud.githubusercontent.com/assets/1000778/7640151/4cc066ac-fa36-11e4-8494-2821d6a6f17c.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #6154 from zsxwing/SPARK-7645 and squashes the following commits:

5db6ca1 [zsxwing] Add UIUtils.formatBatchTime
e4802df [zsxwing] Show milliseconds in the UI if the batch interval < 1 second
---
 .../apache/spark/ui/static/streaming-page.js  | 11 +++-
 .../spark/streaming/ui/AllBatchesTable.scala  | 14 +++--
 .../apache/spark/streaming/ui/BatchPage.scala |  5 +-
 .../spark/streaming/ui/StreamingPage.scala    | 10 ++--
 .../apache/spark/streaming/ui/UIUtils.scala   | 55 ++++++++++++++++++-
 .../spark/streaming/ui/UIUtilsSuite.scala     | 11 ++++
 6 files changed, 94 insertions(+), 12 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
index 0fac658d57842..0ee6752b29e9a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
@@ -98,7 +98,16 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
     var x = d3.scale.linear().domain([minX, maxX]).range([0, width]);
     var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]);
 
-    var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) { return timeFormat[d]; });
+    var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) {
+        var formattedDate = timeFormat[d];
+        var dotIndex = formattedDate.indexOf('.');
+        if (dotIndex >= 0) {
+            // Remove milliseconds
+            return formattedDate.substring(0, dotIndex);
+        } else {
+            return formattedDate;
+        }
+    });
     var formatYValue = d3.format(",.2f");
     var yAxis = d3.svg.axis().scale(y).orient("left").ticks(5).tickFormat(formatYValue);
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
index 3619e129ad9cf..00cc47d6a3ca5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.streaming.ui
 
+import java.text.SimpleDateFormat
+import java.util.Date
+
 import scala.xml.Node
 
 import org.apache.spark.ui.{UIUtils => SparkUIUtils}
 
-private[ui] abstract class BatchTableBase(tableId: String) {
+private[ui] abstract class BatchTableBase(tableId: String, batchInterval: Long) {
 
   protected def columns: Seq[Node] = {
     <th>Batch Time</th>
@@ -35,7 +38,7 @@ private[ui] abstract class BatchTableBase(tableId: String) {
 
   protected def baseRow(batch: BatchUIData): Seq[Node] = {
     val batchTime = batch.batchTime.milliseconds
-    val formattedBatchTime = SparkUIUtils.formatDate(batch.batchTime.milliseconds)
+    val formattedBatchTime = UIUtils.formatBatchTime(batchTime, batchInterval)
     val eventCount = batch.numRecords
     val schedulingDelay = batch.schedulingDelay
     val formattedSchedulingDelay = schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-")
@@ -79,7 +82,8 @@ private[ui] abstract class BatchTableBase(tableId: String) {
 
 private[ui] class ActiveBatchTable(
     runningBatches: Seq[BatchUIData],
-    waitingBatches: Seq[BatchUIData]) extends BatchTableBase("active-batches-table") {
+    waitingBatches: Seq[BatchUIData],
+    batchInterval: Long) extends BatchTableBase("active-batches-table", batchInterval) {
 
   override protected def columns: Seq[Node] = super.columns ++ <th>Status</th>
 
@@ -99,8 +103,8 @@ private[ui] class ActiveBatchTable(
   }
 }
 
-private[ui] class CompletedBatchTable(batches: Seq[BatchUIData])
-  extends BatchTableBase("completed-batches-table") {
+private[ui] class CompletedBatchTable(batches: Seq[BatchUIData], batchInterval: Long)
+  extends BatchTableBase("completed-batches-table", batchInterval) {
 
   override protected def columns: Seq[Node] = super.columns ++
     <th>Total Delay
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 831f60e870f74..f75067669abe5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.ui
 
+import java.text.SimpleDateFormat
+import java.util.Date
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.{NodeSeq, Node, Text}
@@ -288,7 +290,8 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
     val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong)).getOrElse {
       throw new IllegalArgumentException(s"Missing id parameter")
     }
-    val formattedBatchTime = SparkUIUtils.formatDate(batchTime.milliseconds)
+    val formattedBatchTime =
+      UIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration)
 
     val batchUIData = streamingListener.getBatchUIData(batchTime).getOrElse {
       throw new IllegalArgumentException(s"Batch $formattedBatchTime does not exist")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index efce8c58fb962..070564aa10633 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -186,6 +186,8 @@ private[ui] class StreamingPage(parent: StreamingTab)
       <strong>
         {SparkUIUtils.formatDate(startTime)}
       </strong>
+      (<strong>{listener.numTotalCompletedBatches}</strong>
+      completed batches, <strong>{listener.numTotalReceivedRecords}</strong> records)
     </div>
     <br />
   }
@@ -199,9 +201,9 @@ private[ui] class StreamingPage(parent: StreamingTab)
    * @param times all time values that will be used in the graphs.
    */
   private def generateTimeMap(times: Seq[Long]): Seq[Node] = {
-    val dateFormat = new SimpleDateFormat("HH:mm:ss")
     val js = "var timeFormat = {};\n" + times.map { time =>
-      val formattedTime = dateFormat.format(new Date(time))
+      val formattedTime =
+        UIUtils.formatBatchTime(time, listener.batchDuration, showYYYYMMSS = false)
       s"timeFormat[$time] = '$formattedTime';"
     }.mkString("\n")
 
@@ -472,14 +474,14 @@ private[ui] class StreamingPage(parent: StreamingTab)
 
     val activeBatchesContent = {
       <h4 id="active">Active Batches ({runningBatches.size + waitingBatches.size})</h4> ++
-        new ActiveBatchTable(runningBatches, waitingBatches).toNodeSeq
+        new ActiveBatchTable(runningBatches, waitingBatches, listener.batchDuration).toNodeSeq
     }
 
     val completedBatchesContent = {
       <h4 id="completed">
         Completed Batches (last {completedBatches.size} out of {listener.numTotalCompletedBatches})
       </h4> ++
-        new CompletedBatchTable(completedBatches).toNodeSeq
+        new CompletedBatchTable(completedBatches, listener.batchDuration).toNodeSeq
     }
 
     activeBatchesContent ++ completedBatchesContent
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
index f153ee105a18e..86cfb1fa47370 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.ui
 
+import java.text.SimpleDateFormat
+import java.util.TimeZone
 import java.util.concurrent.TimeUnit
 
 private[streaming] object UIUtils {
@@ -62,7 +64,7 @@ private[streaming] object UIUtils {
    * Convert `milliseconds` to the specified `unit`. We cannot use `TimeUnit.convert` because it
    * will discard the fractional part.
    */
-  def convertToTimeUnit(milliseconds: Long, unit: TimeUnit): Double =  unit match {
+  def convertToTimeUnit(milliseconds: Long, unit: TimeUnit): Double = unit match {
     case TimeUnit.NANOSECONDS => milliseconds * 1000 * 1000
     case TimeUnit.MICROSECONDS => milliseconds * 1000
     case TimeUnit.MILLISECONDS => milliseconds
@@ -71,4 +73,55 @@ private[streaming] object UIUtils {
     case TimeUnit.HOURS => milliseconds / 1000.0 / 60.0 / 60.0
     case TimeUnit.DAYS => milliseconds / 1000.0 / 60.0 / 60.0 / 24.0
   }
+
+  // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
+  private val batchTimeFormat = new ThreadLocal[SimpleDateFormat]() {
+    override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
+  }
+
+  private val batchTimeFormatWithMilliseconds = new ThreadLocal[SimpleDateFormat]() {
+    override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS")
+  }
+
+  /**
+   * If `batchInterval` is less than 1 second, format `batchTime` with milliseconds. Otherwise,
+   * format `batchTime` without milliseconds.
+   *
+   * @param batchTime the batch time to be formatted
+   * @param batchInterval the batch interval
+   * @param showYYYYMMSS if showing the `yyyy/MM/dd` part. If it's false, the return value wll be
+   *                     only `HH:mm:ss` or `HH:mm:ss.SSS` depending on `batchInterval`
+   * @param timezone only for test
+   */
+  def formatBatchTime(
+      batchTime: Long,
+      batchInterval: Long,
+      showYYYYMMSS: Boolean = true,
+      timezone: TimeZone = null): String = {
+    val oldTimezones =
+      (batchTimeFormat.get.getTimeZone, batchTimeFormatWithMilliseconds.get.getTimeZone)
+    if (timezone != null) {
+      batchTimeFormat.get.setTimeZone(timezone)
+      batchTimeFormatWithMilliseconds.get.setTimeZone(timezone)
+    }
+    try {
+      val formattedBatchTime =
+        if (batchInterval < 1000) {
+          batchTimeFormatWithMilliseconds.get.format(batchTime)
+        } else {
+          // If batchInterval >= 1 second, don't show milliseconds
+          batchTimeFormat.get.format(batchTime)
+        }
+      if (showYYYYMMSS) {
+        formattedBatchTime
+      } else {
+        formattedBatchTime.substring(formattedBatchTime.indexOf(' ') + 1)
+      }
+    } finally {
+      if (timezone != null) {
+        batchTimeFormat.get.setTimeZone(oldTimezones._1)
+        batchTimeFormatWithMilliseconds.get.setTimeZone(oldTimezones._2)
+      }
+    }
+  }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala
index 6df1a63ab2e37..e9ab917ab845c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.streaming.ui
 
+import java.util.TimeZone
 import java.util.concurrent.TimeUnit
 
 import org.scalatest.FunSuite
@@ -64,4 +65,14 @@ class UIUtilsSuite extends FunSuite with Matchers{
     val convertedTime = UIUtils.convertToTimeUnit(milliseconds, unit)
     convertedTime should be (expectedTime +- 1E-6)
   }
+
+  test("formatBatchTime") {
+    val tzForTest = TimeZone.getTimeZone("America/Los_Angeles")
+    val batchTime = 1431637480452L // Thu May 14 14:04:40 PDT 2015
+    assert("2015/05/14 14:04:40" === UIUtils.formatBatchTime(batchTime, 1000, timezone = tzForTest))
+    assert("2015/05/14 14:04:40.452" ===
+      UIUtils.formatBatchTime(batchTime, 999, timezone = tzForTest))
+    assert("14:04:40" === UIUtils.formatBatchTime(batchTime, 1000, false, timezone = tzForTest))
+    assert("14:04:40.452" === UIUtils.formatBatchTime(batchTime, 999, false, timezone = tzForTest))
+  }
 }

From 723853edab18d28515af22097b76e4e6574b228e Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 14 May 2015 18:13:58 -0700
Subject: [PATCH 177/320] [SPARK-7648] [MLLIB] Add weights and intercept to GLM
 wrappers in spark.ml

Otherwise, users can only use `transform` on the models. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #6156 from mengxr/SPARK-7647 and squashes the following commits:

1ae3d2d [Xiangrui Meng] add weights and intercept to LogisticRegression in Python
f49eb46 [Xiangrui Meng] add weights and intercept to LinearRegressionModel
---
 python/pyspark/ml/classification.py | 18 ++++++++++++++++++
 python/pyspark/ml/regression.py     | 18 ++++++++++++++++++
 python/pyspark/ml/wrapper.py        |  8 +++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 96d29058a3781..8c9a55e79abad 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -43,6 +43,10 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
     >>> model.transform(test0).head().prediction
     0.0
+    >>> model.weights
+    DenseVector([5.5...])
+    >>> model.intercept
+    -2.68...
     >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
     >>> model.transform(test1).head().prediction
     1.0
@@ -148,6 +152,20 @@ class LogisticRegressionModel(JavaModel):
     Model fitted by LogisticRegression.
     """
 
+    @property
+    def weights(self):
+        """
+        Model weights.
+        """
+        return self._call_java("weights")
+
+    @property
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
 
 class TreeClassifierParams(object):
     """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 0ab5c6c3d20c3..2803864ff4a17 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -51,6 +51,10 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> model.transform(test0).head().prediction
     -1.0
+    >>> model.weights
+    DenseVector([1.0])
+    >>> model.intercept
+    0.0
     >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
     >>> model.transform(test1).head().prediction
     1.0
@@ -117,6 +121,20 @@ class LinearRegressionModel(JavaModel):
     Model fitted by LinearRegression.
     """
 
+    @property
+    def weights(self):
+        """
+        Model weights.
+        """
+        return self._call_java("weights")
+
+    @property
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
 
 class TreeRegressorParams(object):
     """
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index f5ac2a398642a..dda6c6aba3049 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -21,7 +21,7 @@
 from pyspark.sql import DataFrame
 from pyspark.ml.param import Params
 from pyspark.ml.pipeline import Estimator, Transformer, Evaluator, Model
-from pyspark.mllib.common import inherit_doc
+from pyspark.mllib.common import inherit_doc, _java2py, _py2java
 
 
 def _jvm():
@@ -149,6 +149,12 @@ def __init__(self, java_model):
     def _java_obj(self):
         return self._java_model
 
+    def _call_java(self, name, *args):
+        m = getattr(self._java_model, name)
+        sc = SparkContext._active_spark_context
+        java_args = [_py2java(sc, arg) for arg in args]
+        return _java2py(sc, m(*java_args))
+
 
 @inherit_doc
 class JavaEvaluator(Evaluator, JavaWrapper):

From 48fc38f5844f6c12bf440f2990b6d7f1630fafac Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 14 May 2015 18:16:22 -0700
Subject: [PATCH 178/320] [SPARK-7619] [PYTHON] fix docstring signature

Just realized that we need `\` at the end of the docstring. brkyvz

Author: Xiangrui Meng <meng@databricks.com>

Closes #6161 from mengxr/SPARK-7619 and squashes the following commits:

e44495f [Xiangrui Meng] fix docstring signature
---
 python/docs/pyspark.ml.rst          | 14 +++++------
 python/pyspark/ml/classification.py | 39 ++++++++++++++---------------
 python/pyspark/ml/feature.py        |  8 +++---
 python/pyspark/ml/recommendation.py |  8 +++---
 python/pyspark/ml/regression.py     | 38 +++++++++++++---------------
 5 files changed, 52 insertions(+), 55 deletions(-)

diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index 8379b8fc8a1e1..518b8e774dd5f 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -1,8 +1,8 @@
 pyspark.ml package
-=====================
+==================
 
 ML Pipeline APIs
---------------
+----------------
 
 .. automodule:: pyspark.ml
     :members:
@@ -10,7 +10,7 @@ ML Pipeline APIs
     :inherited-members:
 
 pyspark.ml.param module
--------------------------
+-----------------------
 
 .. automodule:: pyspark.ml.param
     :members:
@@ -34,7 +34,7 @@ pyspark.ml.classification module
     :inherited-members:
 
 pyspark.ml.recommendation module
--------------------------
+--------------------------------
 
 .. automodule:: pyspark.ml.recommendation
     :members:
@@ -42,7 +42,7 @@ pyspark.ml.recommendation module
     :inherited-members:
 
 pyspark.ml.regression module
--------------------------
+----------------------------
 
 .. automodule:: pyspark.ml.regression
     :members:
@@ -50,7 +50,7 @@ pyspark.ml.regression module
     :inherited-members:
 
 pyspark.ml.tuning module
---------------------------------
+------------------------
 
 .. automodule:: pyspark.ml.tuning
     :members:
@@ -58,7 +58,7 @@ pyspark.ml.tuning module
     :inherited-members:
 
 pyspark.ml.evaluation module
---------------------------------
+----------------------------
 
 .. automodule:: pyspark.ml.evaluation
     :members:
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 8c9a55e79abad..1411d3fd9c56e 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -71,7 +71,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  threshold=0.5, probabilityCol="probability"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  threshold=0.5, probabilityCol="probability")
         """
         super(LogisticRegression, self).__init__()
@@ -96,8 +96,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   threshold=0.5, probabilityCol="probability"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  threshold=0.5, probabilityCol="probability")
         Sets params for logistic regression.
         """
@@ -220,7 +220,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
         """
         super(DecisionTreeClassifier, self).__init__()
@@ -242,9 +242,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   impurity="gini"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  impurity="gini")
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
         Sets params for the DecisionTreeClassifier.
         """
         kwargs = self.setParams._input_kwargs
@@ -320,9 +319,9 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
                  numTrees=20, featureSubsetStrategy="auto", seed=42):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
                  numTrees=20, featureSubsetStrategy="auto", seed=42)
         """
         super(RandomForestClassifier, self).__init__()
@@ -355,9 +354,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto")
         Sets params for linear classification.
         """
@@ -471,10 +470,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
                  maxIter=20, stepSize=0.1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
-                 maxIter=20, stepSize=0.1)
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 lossType="logistic", maxIter=20, stepSize=0.1)
         """
         super(GBTClassifier, self).__init__()
         #: param for Loss function which GBT tries to minimize (case-insensitive).
@@ -502,9 +501,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   lossType="logistic", maxIter=20, stepSize=0.1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   lossType="logistic", maxIter=20, stepSize=0.1)
         Sets params for Gradient Boosted Tree Classification.
         """
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 30e1fd4922d0a..58e22190c7c3c 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -481,7 +481,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
                  inputCol=None, outputCol=None):
         """
-        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
                  inputCol=None, outputCol=None)
         """
         super(RegexTokenizer, self).__init__()
@@ -496,7 +496,7 @@ def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+"
     def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
                   inputCol=None, outputCol=None):
         """
-        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
                   inputCol="input", outputCol="output")
         Sets params for this RegexTokenizer.
         """
@@ -869,7 +869,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                  seed=42, inputCol=None, outputCol=None):
         """
-        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
                  seed=42, inputCol=None, outputCol=None)
         """
         super(Word2Vec, self).__init__()
@@ -889,7 +889,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
     def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                   seed=42, inputCol=None, outputCol=None):
         """
-        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
+        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
                  inputCol=None, outputCol=None)
         Sets params for this Word2Vec.
         """
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 4846b907e85ec..b2439cbd96522 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -92,8 +92,8 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
                  ratingCol="rating", nonnegative=False, checkpointInterval=10):
         """
-        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0,
+        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0, \
                  ratingCol="rating", nonnegative=false, checkpointInterval=10)
         """
         super(ALS, self).__init__()
@@ -118,8 +118,8 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem
                   implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
                   ratingCol="rating", nonnegative=False, checkpointInterval=10):
         """
-        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0, \
                  ratingCol="rating", nonnegative=False, checkpointInterval=10)
         Sets params for ALS.
         """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2803864ff4a17..ef77e19327188 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -33,8 +33,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     Linear regression.
 
     The learning objective is to minimize the squared error, with regularization.
-    The specific squared error loss function used is:
-      L = 1/2n ||A weights - y||^2^
+    The specific squared error loss function used is: L = 1/2n ||A weights - y||^2^
 
     This support multiple types of regularization:
      - none (a.k.a. ordinary least squares)
@@ -191,7 +190,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
         """
         super(DecisionTreeRegressor, self).__init__()
@@ -213,9 +212,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   impurity="variance"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  impurity="variance")
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
         Sets params for the DecisionTreeRegressor.
         """
         kwargs = self.setParams._input_kwargs
@@ -286,10 +284,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
                  numTrees=20, featureSubsetStrategy="auto", seed=42):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
-                 numTrees=20, featureSubsetStrategy="auto", seed=42)
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 impurity="variance", numTrees=20, featureSubsetStrategy="auto", seed=42)
         """
         super(RandomForestRegressor, self).__init__()
         #: param for Criterion used for information gain calculation (case-insensitive).
@@ -321,9 +319,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
                   impurity="variance", numTrees=20, featureSubsetStrategy="auto"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
                   impurity="variance", numTrees=20, featureSubsetStrategy="auto")
         Sets params for linear regression.
         """
@@ -432,10 +430,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
                  maxIter=20, stepSize=0.1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
-                 maxIter=20, stepSize=0.1)
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 lossType="squared", maxIter=20, stepSize=0.1)
         """
         super(GBTRegressor, self).__init__()
         #: param for Loss function which GBT tries to minimize (case-insensitive).
@@ -463,9 +461,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   lossType="squared", maxIter=20, stepSize=0.1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   lossType="squared", maxIter=20, stepSize=0.1)
         Sets params for Gradient Boosted Tree Regression.
         """

From 6d0633e3ec9518278fcc7eba58549d4ad3d5813f Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 14 May 2015 19:49:44 -0700
Subject: [PATCH 179/320] [SPARK-7548] [SQL] Add explode function for
 DataFrames

Add an `explode` function for dataframes and modify the analyzer so that single table generating functions can be present in a select clause along with other expressions.   There are currently the following restrictions:
 - only top level TGFs are allowed (i.e. no `select(explode('list) + 1)`)
 - only one may be present in a single select to avoid potentially confusing implicit Cartesian products.

TODO:
 - [ ] Python

Author: Michael Armbrust <michael@databricks.com>

Closes #6107 from marmbrus/explodeFunction and squashes the following commits:

7ee2c87 [Michael Armbrust] whitespace
6f80ba3 [Michael Armbrust] Update dataframe.py
c176c89 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into explodeFunction
81b5da3 [Michael Armbrust] style
d3faa05 [Michael Armbrust] fix self join case
f9e1e3e [Michael Armbrust] fix python, add since
4f0d0a9 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into explodeFunction
e710fe4 [Michael Armbrust] add java and python
52ca0dc [Michael Armbrust] [SPARK-7548][SQL] Add explode function for dataframes.
---
 python/pyspark/sql/dataframe.py               |  12 +-
 python/pyspark/sql/functions.py               |  20 +++
 python/pyspark/sql/tests.py                   |  15 +++
 .../sql/catalyst/analysis/Analyzer.scala      | 117 +++++++++++-------
 .../plans/logical/basicOperators.scala        |   3 +
 .../sql/catalyst/analysis/AnalysisSuite.scala |  10 +-
 .../scala/org/apache/spark/sql/Column.scala   |  27 +++-
 .../org/apache/spark/sql/DataFrame.scala      |   5 +-
 .../org/apache/spark/sql/functions.scala      |   5 +
 .../spark/sql/ColumnExpressionSuite.scala     |  60 +++++++++
 10 files changed, 223 insertions(+), 51 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 82cb1c2fdbf94..2ed95ac8e2505 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1511,13 +1511,19 @@ def inSet(self, *cols):
     isNull = _unary_op("isNull", "True if the current expression is null.")
     isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
 
-    def alias(self, alias):
-        """Return a alias for this column
+    def alias(self, *alias):
+        """Returns this column aliased with a new name or names (in the case of expressions that
+        return more than one column, such as explode).
 
         >>> df.select(df.age.alias("age2")).collect()
         [Row(age2=2), Row(age2=5)]
         """
-        return Column(getattr(self._jc, "as")(alias))
+
+        if len(alias) == 1:
+            return Column(getattr(self._jc, "as")(alias[0]))
+        else:
+            sc = SparkContext._active_spark_context
+            return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
 
     @ignore_unicode_prefix
     def cast(self, dataType):
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d91265ee0bec8..6cd6974b0e5bb 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -169,6 +169,26 @@ def approxCountDistinct(col, rsd=None):
     return Column(jc)
 
 
+def explode(col):
+    """Returns a new row for each element in the given array or map.
+
+    >>> from pyspark.sql import Row
+    >>> eDF = sqlContext.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
+    >>> eDF.select(explode(eDF.intlist).alias("anInt")).collect()
+    [Row(anInt=1), Row(anInt=2), Row(anInt=3)]
+
+    >>> eDF.select(explode(eDF.mapfield).alias("key", "value")).show()
+    +---+-----+
+    |key|value|
+    +---+-----+
+    |  a|    b|
+    +---+-----+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.explode(_to_java_column(col))
+    return Column(jc)
+
+
 def coalesce(*cols):
     """Returns the first column that is not null.
 
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 1922d03af61da..d37c5dbed7f6b 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -117,6 +117,21 @@ def tearDownClass(cls):
         ReusedPySparkTestCase.tearDownClass()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
 
+    def test_explode(self):
+        from pyspark.sql.functions import explode
+        d = [Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"})]
+        rdd = self.sc.parallelize(d)
+        data = self.sqlCtx.createDataFrame(rdd)
+
+        result = data.select(explode(data.intlist).alias("a")).select("a").collect()
+        self.assertEqual(result[0][0], 1)
+        self.assertEqual(result[1][0], 2)
+        self.assertEqual(result[2][0], 3)
+
+        result = data.select(explode(data.mapfield).alias("a", "b")).select("a", "b").collect()
+        self.assertEqual(result[0][0], "a")
+        self.assertEqual(result[0][1], "b")
+
     def test_udf_with_callable(self):
         d = [Row(number=i, squared=i**2) for i in range(10)]
         rdd = self.sc.parallelize(d)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4baeeb5b58c2d..0b6e1d44b9c4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -73,7 +73,6 @@ class Analyzer(
       ResolveGroupingAnalytics ::
       ResolveSortReferences ::
       ResolveGenerate ::
-      ImplicitGenerate ::
       ResolveFunctions ::
       ExtractWindowExpressions ::
       GlobalAggregates ::
@@ -323,6 +322,11 @@ class Analyzer(
               if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty =>
             (oldVersion, oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions)))
 
+          case oldVersion: Generate
+              if oldVersion.generatedSet.intersect(conflictingAttributes).nonEmpty =>
+            val newOutput = oldVersion.generatorOutput.map(_.newInstance())
+            (oldVersion, oldVersion.copy(generatorOutput = newOutput))
+
           case oldVersion @ Window(_, windowExpressions, _, child)
               if AttributeSet(windowExpressions.map(_.toAttribute)).intersect(conflictingAttributes)
                 .nonEmpty =>
@@ -521,66 +525,89 @@ class Analyzer(
   }
 
   /**
-   * When a SELECT clause has only a single expression and that expression is a
-   * [[catalyst.expressions.Generator Generator]] we convert the
-   * [[catalyst.plans.logical.Project Project]] to a [[catalyst.plans.logical.Generate Generate]].
+   * Rewrites table generating expressions that either need one or more of the following in order
+   * to be resolved:
+   *  - concrete attribute references for their output.
+   *  - to be relocated from a SELECT clause (i.e. from  a [[Project]]) into a [[Generate]]).
+   *
+   * Names for the output [[Attributes]] are extracted from [[Alias]] or [[MultiAlias]] expressions
+   * that wrap the [[Generator]]. If more than one [[Generator]] is found in a Project, an
+   * [[AnalysisException]] is throw.
    */
-  object ImplicitGenerate extends Rule[LogicalPlan] {
+  object ResolveGenerate extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-      case Project(Seq(Alias(g: Generator, name)), child) =>
-        Generate(g, join = false, outer = false,
-          qualifier = None, UnresolvedAttribute(name) :: Nil, child)
-      case Project(Seq(MultiAlias(g: Generator, names)), child) =>
-        Generate(g, join = false, outer = false,
-          qualifier = None, names.map(UnresolvedAttribute(_)), child)
+      case p: Generate if !p.child.resolved || !p.generator.resolved => p
+      case g: Generate if g.resolved == false =>
+          g.copy(
+            generatorOutput = makeGeneratorOutput(g.generator, g.generatorOutput.map(_.name)))
+
+      case p @ Project(projectList, child) =>
+        // Holds the resolved generator, if one exists in the project list.
+        var resolvedGenerator: Generate = null
+
+        val newProjectList = projectList.flatMap {
+          case AliasedGenerator(generator, names) if generator.childrenResolved =>
+            if (resolvedGenerator != null) {
+              failAnalysis(
+                s"Only one generator allowed per select but ${resolvedGenerator.nodeName} and " +
+                s"and ${generator.nodeName} found.")
+            }
+
+            resolvedGenerator =
+              Generate(
+                generator,
+                join = projectList.size > 1, // Only join if there are other expressions in SELECT.
+                outer = false,
+                qualifier = None,
+                generatorOutput = makeGeneratorOutput(generator, names),
+                child)
+
+            resolvedGenerator.generatorOutput
+          case other => other :: Nil
+        }
+
+        if (resolvedGenerator != null) {
+          Project(newProjectList, resolvedGenerator)
+        } else {
+          p
+        }
     }
-  }
 
-  /**
-   * Resolve the Generate, if the output names specified, we will take them, otherwise
-   * we will try to provide the default names, which follow the same rule with Hive.
-   */
-  object ResolveGenerate extends Rule[LogicalPlan] {
-    // Construct the output attributes for the generator,
-    // The output attribute names can be either specified or
-    // auto generated.
+    /** Extracts a [[Generator]] expression and any names assigned by aliases to their output. */
+    private object AliasedGenerator {
+      def unapply(e: Expression): Option[(Generator, Seq[String])] = e match {
+        case Alias(g: Generator, name) => Some((g, name :: Nil))
+        case MultiAlias(g: Generator, names) => Some(g, names)
+        case _ => None
+      }
+    }
+
+    /**
+     * Construct the output attributes for a [[Generator]], given a list of names.  If the list of
+     * names is empty names are assigned by ordinal (i.e., _c0, _c1, ...) to match Hive's defaults.
+     */
     private def makeGeneratorOutput(
         generator: Generator,
-        generatorOutput: Seq[Attribute]): Seq[Attribute] = {
+        names: Seq[String]): Seq[Attribute] = {
       val elementTypes = generator.elementTypes
 
-      if (generatorOutput.length == elementTypes.length) {
-        generatorOutput.zip(elementTypes).map {
-          case (a, (t, nullable)) if !a.resolved =>
-            AttributeReference(a.name, t, nullable)()
-          case (a, _) => a
+      if (names.length == elementTypes.length) {
+        names.zip(elementTypes).map {
+          case (name, (t, nullable)) =>
+            AttributeReference(name, t, nullable)()
         }
-      } else if (generatorOutput.length == 0) {
+      } else if (names.isEmpty) {
         elementTypes.zipWithIndex.map {
           // keep the default column names as Hive does _c0, _c1, _cN
           case ((t, nullable), i) => AttributeReference(s"_c$i", t, nullable)()
         }
       } else {
-        throw new AnalysisException(
-          s"""
-             |The number of aliases supplied in the AS clause does not match
-             |the number of columns output by the UDTF expected
-             |${elementTypes.size} aliases but got ${generatorOutput.size}
-           """.stripMargin)
+        failAnalysis(
+          "The number of aliases supplied in the AS clause does not match the number of columns " +
+          s"output by the UDTF expected ${elementTypes.size} aliases but got " +
+          s"${names.mkString(",")} ")
       }
     }
-
-    def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-      case p: Generate if !p.child.resolved || !p.generator.resolved => p
-      case p: Generate if p.resolved == false =>
-        // if the generator output names are not specified, we will use the default ones.
-        Generate(
-          p.generator,
-          join = p.join,
-          outer = p.outer,
-          p.qualifier,
-          makeGeneratorOutput(p.generator, p.generatorOutput), p.child)
-    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 0f349f9d11415..01f4b6e9bb77d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -59,6 +59,9 @@ case class Generate(
     child: LogicalPlan)
   extends UnaryNode {
 
+  /** The set of all attributes produced by this node. */
+  def generatedSet: AttributeSet = AttributeSet(generatorOutput)
+
   override lazy val resolved: Boolean = {
     generator.resolved &&
       childrenResolved &&
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 6f2f35564d12e..e1d6ac462fbcc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -72,6 +72,9 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
       StructField("cField", StringType) :: Nil
     ))())
 
+  val listRelation = LocalRelation(
+    AttributeReference("list", ArrayType(IntegerType))())
+
   before {
     caseSensitiveCatalog.registerTable(Seq("TaBlE"), testRelation)
     caseInsensitiveCatalog.registerTable(Seq("TaBlE"), testRelation)
@@ -159,10 +162,15 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
         }
       }
 
-      errorMessages.foreach(m => assert(error.getMessage contains m))
+      errorMessages.foreach(m => assert(error.getMessage.toLowerCase contains m.toLowerCase))
     }
   }
 
+  errorTest(
+    "too many generators",
+    listRelation.select(Explode('list).as('a), Explode('list).as('b)),
+    "only one generator" :: "explode" :: Nil)
+
   errorTest(
     "unresolved attributes",
     testRelation.select('abcd),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 8bf1320ccb71d..dc0aeea7c4aea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -18,12 +18,13 @@
 package org.apache.spark.sql
 
 import scala.language.implicitConversions
+import scala.collection.JavaConversions._
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.Logging
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar, UnresolvedExtractValue}
+import org.apache.spark.sql.catalyst.analysis.{MultiAlias, UnresolvedAttribute, UnresolvedStar, UnresolvedExtractValue}
 import org.apache.spark.sql.types._
 
 
@@ -727,6 +728,30 @@ class Column(protected[sql] val expr: Expression) extends Logging {
    */
   def as(alias: String): Column = Alias(expr, alias)()
 
+  /**
+   * (Scala-specific) Assigns the given aliases to the results of a table generating function.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select(explode($"myMap").as("key" :: "value" :: Nil))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 1.4.0
+   */
+  def as(aliases: Seq[String]): Column = MultiAlias(expr, aliases)
+
+  /**
+   * Assigns the given aliases to the results of a table generating function.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select(explode($"myMap").as("key" :: "value" :: Nil))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 1.4.0
+   */
+  def as(aliases: Array[String]): Column = MultiAlias(expr, aliases)
+
   /**
    * Gives the column an alias.
    * {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 4fd5105c27443..2e20c3d3f4ed2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -34,7 +34,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedAttribute, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{MultiAlias, ResolvedStar, UnresolvedAttribute, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, _}
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
@@ -593,6 +593,9 @@ class DataFrame private[sql](
   def select(cols: Column*): DataFrame = {
     val namedExpressions = cols.map {
       case Column(expr: NamedExpression) => expr
+      // Leave an unaliased explode with an empty list of names since the analzyer will generate the
+      // correct defaults after the nested expression's type has been resolved.
+      case Column(explode: Explode) => MultiAlias(explode, Nil)
       case Column(expr: Expression) => Alias(expr, expr.prettyString)()
     }
     // When user continuously call `select`, speed up analysis by collapsing `Project`
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 4404ad8ad63a8..6640631cf0719 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -363,6 +363,11 @@ object functions {
   @scala.annotation.varargs
   def coalesce(e: Column*): Column = Coalesce(e.map(_.expr))
 
+  /**
+   * Creates a new row for each element in the given array or map column.
+   */
+   def explode(e: Column): Column = Explode(e.expr)
+
   /**
    * Converts a string exprsesion to lower case.
    *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 269e185543059..9bdf201b3be7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -27,6 +27,66 @@ import org.apache.spark.sql.types._
 class ColumnExpressionSuite extends QueryTest {
   import org.apache.spark.sql.TestData._
 
+  test("single explode") {
+    val df = Seq((1, Seq(1,2,3))).toDF("a", "intList")
+    checkAnswer(
+      df.select(explode('intList)),
+      Row(1) :: Row(2) :: Row(3) :: Nil)
+  }
+
+  test("explode and other columns") {
+    val df = Seq((1, Seq(1,2,3))).toDF("a", "intList")
+
+    checkAnswer(
+      df.select($"a", explode('intList)),
+      Row(1, 1) ::
+      Row(1, 2) ::
+      Row(1, 3) :: Nil)
+
+    checkAnswer(
+      df.select($"*", explode('intList)),
+      Row(1, Seq(1,2,3), 1) ::
+      Row(1, Seq(1,2,3), 2) ::
+      Row(1, Seq(1,2,3), 3) :: Nil)
+  }
+
+  test("aliased explode") {
+    val df = Seq((1, Seq(1,2,3))).toDF("a", "intList")
+
+    checkAnswer(
+      df.select(explode('intList).as('int)).select('int),
+      Row(1) :: Row(2) :: Row(3) :: Nil)
+
+    checkAnswer(
+      df.select(explode('intList).as('int)).select(sum('int)),
+      Row(6) :: Nil)
+  }
+
+  test("explode on map") {
+    val df = Seq((1, Map("a" -> "b"))).toDF("a", "map")
+
+    checkAnswer(
+      df.select(explode('map)),
+      Row("a", "b"))
+  }
+
+  test("explode on map with aliases") {
+    val df = Seq((1, Map("a" -> "b"))).toDF("a", "map")
+
+    checkAnswer(
+      df.select(explode('map).as("key1" :: "value1" :: Nil)).select("key1", "value1"),
+      Row("a", "b"))
+  }
+
+  test("self join explode") {
+    val df = Seq((1, Seq(1,2,3))).toDF("a", "intList")
+    val exploded = df.select(explode('intList).as('i))
+
+    checkAnswer(
+      exploded.join(exploded, exploded("i") === exploded("i")).agg(count("*")),
+      Row(3) :: Nil)
+  }
+
   test("collect on column produced by a binary operator") {
     val df = Seq((1, 2, 3)).toDF("a", "b", "c")
     checkAnswer(df.select(df("a") + df("b")), Seq(Row(3)))

From f9705d461350c6fccf8022e933ea909f40c53576 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 14 May 2015 20:49:21 -0700
Subject: [PATCH 180/320] [SPARK-7098][SQL] Make the WHERE clause with
 timestamp show consistent result

JIRA: https://issues.apache.org/jira/browse/SPARK-7098

The WHERE clause with timstamp shows inconsistent results. This pr fixes it.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #5682 from viirya/consistent_timestamp and squashes the following commits:

171445a [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into consistent_timestamp
4e98520 [Liang-Chi Hsieh] Make the WHERE clause with timestamp show consistent result.
---
 .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala      | 6 +++---
 .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++++
 sql/core/src/test/scala/org/apache/spark/sql/TestData.scala | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 168a4e30eab86..fe0d3f29977c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -251,10 +251,10 @@ trait HiveTypeCoercion {
         p.makeCopy(Array(Cast(p.left, StringType), p.right))
       case p: BinaryComparison if p.left.dataType == StringType &&
                                   p.right.dataType == TimestampType =>
-        p.makeCopy(Array(p.left, Cast(p.right, StringType)))
+        p.makeCopy(Array(Cast(p.left, TimestampType), p.right))
       case p: BinaryComparison if p.left.dataType == TimestampType &&
                                   p.right.dataType == StringType =>
-        p.makeCopy(Array(Cast(p.left, StringType), p.right))
+        p.makeCopy(Array(p.left, Cast(p.right, TimestampType)))
       case p: BinaryComparison if p.left.dataType == TimestampType &&
                                   p.right.dataType == DateType =>
         p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType)))
@@ -274,7 +274,7 @@ trait HiveTypeCoercion {
         i.makeCopy(Array(Cast(a, StringType), b))
       case i @ In(a, b) if a.dataType == TimestampType &&
                            b.forall(_.dataType == StringType) =>
-        i.makeCopy(Array(Cast(a, StringType), b))
+        i.makeCopy(Array(a, b.map(Cast(_, TimestampType))))
       case i @ In(a, b) if a.dataType == DateType &&
                            b.forall(_.dataType == TimestampType) =>
         i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType))))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 8cdbe076cbd85..479ad9fe621d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -297,6 +297,10 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("SPARK-3173 Timestamp support in the parser") {
+    checkAnswer(sql(
+      "SELECT time FROM timestamps WHERE time='1969-12-31 16:00:00.0'"),
+      Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00")))
+
     checkAnswer(sql(
       "SELECT time FROM timestamps WHERE time=CAST('1969-12-31 16:00:00.001' AS TIMESTAMP)"),
       Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001")))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 446771ab2a5a5..8fbc2d23d47e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -175,7 +175,7 @@ object TestData {
       "4, D4, true, 2147483644" :: Nil)
 
   case class TimestampField(time: Timestamp)
-  val timestamps = TestSQLContext.sparkContext.parallelize((1 to 3).map { i =>
+  val timestamps = TestSQLContext.sparkContext.parallelize((0 to 3).map { i =>
     TimestampField(new Timestamp(i))
   })
   timestamps.toDF().registerTempTable("timestamps")

From e8f0e016eaf80a363796dd0a094291dcb3b35793 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 15 May 2015 12:04:26 +0800
Subject: [PATCH 181/320] [SQL] When creating partitioned table scan,
 explicitly create UnionRDD.

Otherwise, it will cause stack overflow when there are many partitions.

Author: Yin Huai <yhuai@databricks.com>

Closes #6162 from yhuai/partitionUnionedRDD and squashes the following commits:

fa016d8 [Yin Huai] Explicitly create UnionRDD.
---
 .../apache/spark/sql/sources/DataSourceStrategy.scala | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index a5410cda0fe6b..ee099ab9593c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -21,7 +21,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.Logging
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{UnionRDD, RDD}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
@@ -169,9 +169,12 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
       scan.execute()
     }
 
-    val unionedRows = perPartitionRows.reduceOption(_ ++ _).getOrElse {
-      relation.sqlContext.emptyResult
-    }
+    val unionedRows =
+      if (perPartitionRows.length == 0) {
+        relation.sqlContext.emptyResult
+      } else {
+        new UnionRDD(relation.sqlContext.sparkContext, perPartitionRows)
+      }
 
     createPhysicalRDD(logicalRelation.relation, output, unionedRows)
   }

From 7da33ce5057ff965eec19ce662465b64a3564019 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 14 May 2015 23:17:41 -0700
Subject: [PATCH 182/320] [HOTFIX] Add workaround for SPARK-7660 to fix
 JavaAPISuite failures.

---
 .../spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
index 730d265c87f88..78e52643531e0 100644
--- a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
@@ -35,6 +35,7 @@
 import org.mockito.MockitoAnnotations;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
+import org.xerial.snappy.buffer.CachedBufferAllocator;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.lessThan;
@@ -96,6 +97,13 @@ public OutputStream apply(OutputStream stream) {
   @After
   public void tearDown() {
     Utils.deleteRecursively(tempDir);
+    // This call is a workaround for SPARK-7660, a snappy-java bug which is exposed by this test
+    // suite. Clearing the cached buffer allocator's pool of reusable buffers masks this bug,
+    // preventing a test failure in JavaAPISuite that would otherwise occur. The underlying bug
+    // needs to be fixed, but in the meantime this workaround avoids spurious Jenkins failures.
+    synchronized (CachedBufferAllocator.class) {
+      CachedBufferAllocator.queueTable.clear();
+    }
     final long leakedMemory = taskMemoryManager.cleanUpAllAllocatedMemory();
     if (leakedMemory != 0) {
       fail("Test leaked " + leakedMemory + " bytes of managed memory");

From daf4ae72fe01b6d9631bfbd061b3846bdf668dfa Mon Sep 17 00:00:00 2001
From: Kan Zhang <kzhang@apache.org>
Date: Thu, 14 May 2015 23:50:50 -0700
Subject: [PATCH 183/320] [CORE] Remove unreachable Heartbeat message from
 Worker

It doesn't look to me Heartbeat is sent to Worker from anyone.

Author: Kan Zhang <kzhang@apache.org>

Closes #6163 from kanzhang/deadwood and squashes the following commits:

56be118 [Kan Zhang] [core] Remove unreachable Heartbeat message from Worker
---
 .../src/main/scala/org/apache/spark/deploy/worker/Worker.scala | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 8f3cc54051048..c8df024dda355 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -324,9 +324,6 @@ private[worker] class Worker(
         map(e => new ExecutorDescription(e.appId, e.execId, e.cores, e.state))
       sender ! WorkerSchedulerStateResponse(workerId, execs.toList, drivers.keys.toSeq)
 
-    case Heartbeat =>
-      logInfo(s"Received heartbeat from driver ${sender.path}")
-
     case RegisterWorkerFailed(message) =>
       if (!registered) {
         logError("Worker registration failed: " + message)

From cf842d42a70398671c4bc5ebfa70f6fdb8c57c7f Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Thu, 14 May 2015 23:51:41 -0700
Subject: [PATCH 184/320] [SPARK-7650] [STREAMING] [WEBUI] Move streaming css
 and js files to the streaming project

cc tdas

Author: zsxwing <zsxwing@gmail.com>

Closes #6160 from zsxwing/SPARK-7650 and squashes the following commits:

fe6ae15 [zsxwing] Fix the import order
a4ffd99 [zsxwing] Merge branch 'master' into SPARK-7650
dc402b6 [zsxwing] Move streaming css and js files to the streaming project
---
 core/src/main/scala/org/apache/spark/ui/WebUI.scala  |  2 +-
 .../spark/streaming}/ui/static/streaming-page.css    |  0
 .../spark/streaming}/ui/static/streaming-page.js     |  0
 .../apache/spark/streaming/ui/StreamingPage.scala    |  4 ++--
 .../org/apache/spark/streaming/ui/StreamingTab.scala | 12 +++++++++++-
 5 files changed, 14 insertions(+), 4 deletions(-)
 rename {core/src/main/resources/org/apache/spark => streaming/src/main/resources/org/apache/spark/streaming}/ui/static/streaming-page.css (100%)
 rename {core/src/main/resources/org/apache/spark => streaming/src/main/resources/org/apache/spark/streaming}/ui/static/streaming-page.js (100%)

diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 384f2ad26e281..1df9cd0fa18b4 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -94,7 +94,7 @@ private[spark] abstract class WebUI(
   }
 
   /** Detach a handler from this UI. */
-  protected def detachHandler(handler: ServletContextHandler) {
+  def detachHandler(handler: ServletContextHandler) {
     handlers -= handler
     serverInfo.foreach { info =>
       info.rootHandler.removeHandler(handler)
diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.css b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css
similarity index 100%
rename from core/src/main/resources/org/apache/spark/ui/static/streaming-page.css
rename to streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css
diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
similarity index 100%
rename from core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
rename to streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 070564aa10633..4ee7a486e370b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -166,8 +166,8 @@ private[ui] class StreamingPage(parent: StreamingTab)
   private def generateLoadResources(): Seq[Node] = {
     // scalastyle:off
     <script src={SparkUIUtils.prependBaseUri("/static/d3.min.js")}></script>
-      <link rel="stylesheet" href={SparkUIUtils.prependBaseUri("/static/streaming-page.css")} type="text/css"/>
-      <script src={SparkUIUtils.prependBaseUri("/static/streaming-page.js")}></script>
+      <link rel="stylesheet" href={SparkUIUtils.prependBaseUri("/static/streaming/streaming-page.css")} type="text/css"/>
+      <script src={SparkUIUtils.prependBaseUri("/static/streaming/streaming-page.js")}></script>
     // scalastyle:on
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
index f307b54bb9630..e0c0f57212f55 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.streaming.ui
 
+import org.eclipse.jetty.servlet.ServletContextHandler
+
 import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.ui.{SparkUI, SparkUITab}
+import org.apache.spark.ui.{JettyUtils, SparkUI, SparkUITab}
 
 import StreamingTab._
 
@@ -30,6 +32,8 @@ import StreamingTab._
 private[spark] class StreamingTab(val ssc: StreamingContext)
   extends SparkUITab(getSparkUI(ssc), "streaming") with Logging {
 
+  private val STATIC_RESOURCE_DIR = "org/apache/spark/streaming/ui/static"
+
   val parent = getSparkUI(ssc)
   val listener = ssc.progressListener
 
@@ -38,12 +42,18 @@ private[spark] class StreamingTab(val ssc: StreamingContext)
   attachPage(new StreamingPage(this))
   attachPage(new BatchPage(this))
 
+  var staticHandler: ServletContextHandler = null
+
   def attach() {
     getSparkUI(ssc).attachTab(this)
+    staticHandler = JettyUtils.createStaticHandler(STATIC_RESOURCE_DIR, "/static/streaming")
+    getSparkUI(ssc).attachHandler(staticHandler)
   }
 
   def detach() {
     getSparkUI(ssc).detachTab(this)
+    getSparkUI(ssc).detachHandler(staticHandler)
+    staticHandler = null
   }
 }
 

From 94761485b207fa1f12a8410a68920300d851bf61 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 15 May 2015 00:18:39 -0700
Subject: [PATCH 185/320] [SPARK-6258] [MLLIB] GaussianMixture Python API
 parity check

Implement Python API for major disparities of GaussianMixture cluster algorithm between Scala & Python
```scala
GaussianMixture
    setInitialModel
GaussianMixtureModel
    k
```

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #6087 from yanboliang/spark-6258 and squashes the following commits:

b3af21c [Yanbo Liang] fix typo
2b645c1 [Yanbo Liang] fix doc
638b4b7 [Yanbo Liang] address comments
b5bcade [Yanbo Liang] GaussianMixture Python API parity check
---
 .../mllib/api/python/PythonMLLibAPI.scala     | 24 +++++--
 .../clustering/GaussianMixtureModel.scala     |  9 ++-
 python/pyspark/mllib/clustering.py            | 67 +++++++++++++++----
 3 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index f4c477596557f..2fa54df6fc2b2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -345,28 +345,40 @@ private[python] class PythonMLLibAPI extends Serializable {
    * Returns a list containing weights, mean and covariance of each mixture component.
    */
   def trainGaussianMixture(
-      data: JavaRDD[Vector], 
-      k: Int, 
-      convergenceTol: Double, 
+      data: JavaRDD[Vector],
+      k: Int,
+      convergenceTol: Double,
       maxIterations: Int,
-      seed: java.lang.Long): JList[Object] = {
+      seed: java.lang.Long,
+      initialModelWeights: java.util.ArrayList[Double],
+      initialModelMu: java.util.ArrayList[Vector],
+      initialModelSigma: java.util.ArrayList[Matrix]): JList[Object] = {
     val gmmAlg = new GaussianMixture()
       .setK(k)
       .setConvergenceTol(convergenceTol)
       .setMaxIterations(maxIterations)
 
+    if (initialModelWeights != null && initialModelMu != null && initialModelSigma != null) {
+      val gaussians = initialModelMu.asScala.toSeq.zip(initialModelSigma.asScala.toSeq).map {
+        case (x, y) => new MultivariateGaussian(x.asInstanceOf[Vector], y.asInstanceOf[Matrix])
+      }
+      val initialModel = new GaussianMixtureModel(
+        initialModelWeights.asScala.toArray, gaussians.toArray)
+      gmmAlg.setInitialModel(initialModel)
+    }
+
     if (seed != null) gmmAlg.setSeed(seed)
 
     try {
       val model = gmmAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK))
       var wt = ArrayBuffer.empty[Double]
-      var mu = ArrayBuffer.empty[Vector]      
+      var mu = ArrayBuffer.empty[Vector]
       var sigma = ArrayBuffer.empty[Matrix]
       for (i <- 0 until model.k) {
           wt += model.weights(i)
           mu += model.gaussians(i).mu
           sigma += model.gaussians(i).sigma
-      }    
+      }
       List(Vectors.dense(wt.toArray), mu.toArray, sigma.toArray).map(_.asInstanceOf[Object]).asJava
     } finally {
       data.rdd.unpersist(blocking = false)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index ec65a3da689de..c22862c130e77 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -38,11 +38,10 @@ import org.apache.spark.sql.{SQLContext, Row}
  * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are 
  * the respective mean and covariance for each Gaussian distribution i=1..k. 
  * 
- * @param weight Weights for each Gaussian distribution in the mixture, where weight(i) is
- *               the weight for Gaussian i, and weight.sum == 1
- * @param mu Means for each Gaussian in the mixture, where mu(i) is the mean for Gaussian i
- * @param sigma Covariance maxtrix for each Gaussian in the mixture, where sigma(i) is the
- *              covariance matrix for Gaussian i
+ * @param weights Weights for each Gaussian distribution in the mixture, where weights(i) is
+ *                the weight for Gaussian i, and weights.sum == 1
+ * @param gaussians Array of MultivariateGaussian where gaussians(i) represents
+ *                  the Multivariate Gaussian (Normal) Distribution for Gaussian i
  */
 @Experimental
 class GaussianMixtureModel(
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 04e67158514f5..a53333dae6a82 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -142,6 +142,7 @@ class GaussianMixtureModel(object):
 
     """A clustering model derived from the Gaussian Mixture Model method.
 
+    >>> from pyspark.mllib.linalg import Vectors, DenseMatrix
     >>> clusterdata_1 =  sc.parallelize(array([-0.1,-0.05,-0.01,-0.1,
     ...                                         0.9,0.8,0.75,0.935,
     ...                                        -0.83,-0.68,-0.91,-0.76 ]).reshape(6, 2))
@@ -154,11 +155,12 @@ class GaussianMixtureModel(object):
     True
     >>> labels[4]==labels[5]
     True
-    >>> clusterdata_2 =  sc.parallelize(array([-5.1971, -2.5359, -3.8220,
-    ...                                        -5.2211, -5.0602,  4.7118,
-    ...                                         6.8989, 3.4592,  4.6322,
-    ...                                         5.7048,  4.6567, 5.5026,
-    ...                                         4.5605,  5.2043,  6.2734]).reshape(5, 3))
+    >>> data =  array([-5.1971, -2.5359, -3.8220,
+    ...                -5.2211, -5.0602,  4.7118,
+    ...                 6.8989, 3.4592,  4.6322,
+    ...                 5.7048,  4.6567, 5.5026,
+    ...                 4.5605,  5.2043,  6.2734])
+    >>> clusterdata_2 = sc.parallelize(data.reshape(5,3))
     >>> model = GaussianMixture.train(clusterdata_2, 2, convergenceTol=0.0001,
     ...                               maxIterations=150, seed=10)
     >>> labels = model.predict(clusterdata_2).collect()
@@ -166,12 +168,38 @@ class GaussianMixtureModel(object):
     True
     >>> labels[3]==labels[4]
     True
+    >>> clusterdata_3 = sc.parallelize(data.reshape(15, 1))
+    >>> im = GaussianMixtureModel([0.5, 0.5],
+    ...      [MultivariateGaussian(Vectors.dense([-1.0]), DenseMatrix(1, 1, [1.0])),
+    ...      MultivariateGaussian(Vectors.dense([1.0]), DenseMatrix(1, 1, [1.0]))])
+    >>> model = GaussianMixture.train(clusterdata_3, 2, initialModel=im)
     """
 
     def __init__(self, weights, gaussians):
-        self.weights = weights
-        self.gaussians = gaussians
-        self.k = len(self.weights)
+        self._weights = weights
+        self._gaussians = gaussians
+        self._k = len(self._weights)
+
+    @property
+    def weights(self):
+        """
+        Weights for each Gaussian distribution in the mixture, where weights[i] is
+        the weight for Gaussian i, and weights.sum == 1.
+        """
+        return self._weights
+
+    @property
+    def gaussians(self):
+        """
+        Array of MultivariateGaussian where gaussians[i] represents
+        the Multivariate Gaussian (Normal) Distribution for Gaussian i.
+        """
+        return self._gaussians
+
+    @property
+    def k(self):
+        """Number of gaussians in mixture."""
+        return self._k
 
     def predict(self, x):
         """
@@ -193,9 +221,9 @@ def predictSoft(self, x):
         :return:     membership_matrix. RDD of array of double values.
         """
         if isinstance(x, RDD):
-            means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
+            means, sigmas = zip(*[(g.mu, g.sigma) for g in self._gaussians])
             membership_matrix = callMLlibFunc("predictSoftGMM", x.map(_convert_to_vector),
-                                              _convert_to_vector(self.weights), means, sigmas)
+                                              _convert_to_vector(self._weights), means, sigmas)
             return membership_matrix.map(lambda x: pyarray.array('d', x))
 
 
@@ -208,13 +236,24 @@ class GaussianMixture(object):
     :param convergenceTol:  Threshold value to check the convergence criteria. Defaults to 1e-3
     :param maxIterations:   Number of iterations. Default to 100
     :param seed:            Random Seed
+    :param initialModel:    GaussianMixtureModel for initializing learning
     """
     @classmethod
-    def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None):
+    def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None):
         """Train a Gaussian Mixture clustering model."""
-        weight, mu, sigma = callMLlibFunc("trainGaussianMixture",
-                                          rdd.map(_convert_to_vector), k,
-                                          convergenceTol, maxIterations, seed)
+        initialModelWeights = None
+        initialModelMu = None
+        initialModelSigma = None
+        if initialModel is not None:
+            if initialModel.k != k:
+                raise Exception("Mismatched cluster count, initialModel.k = %s, however k = %s"
+                                % (initialModel.k, k))
+            initialModelWeights = initialModel.weights
+            initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)]
+            initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)]
+        weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k,
+                                          convergenceTol, maxIterations, seed, initialModelWeights,
+                                          initialModelMu, initialModelSigma)
         mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)]
         return GaussianMixtureModel(weight, mvg_obj)
 

From fdf5bba35d201fe0de3901b4d47262c485c76569 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Fri, 15 May 2015 16:20:49 +0800
Subject: [PATCH 186/320] [SPARK-7591] [SQL] Partitioning support API tweaks

Please see [SPARK-7591] [1] for the details.

/cc rxin marmbrus yhuai

[1]: https://issues.apache.org/jira/browse/SPARK-7591

Author: Cheng Lian <lian@databricks.com>

Closes #6150 from liancheng/spark-7591 and squashes the following commits:

af422e7 [Cheng Lian] Addresses @rxin's comments
37d1738 [Cheng Lian] Fixes HadoopFsRelation partition columns initialization
2fc680a [Cheng Lian] Fixes Scala style issue
189ad23 [Cheng Lian] Removes HadoopFsRelation constructor arguments
522c24e [Cheng Lian] Adds OutputWriterFactory
047d40d [Cheng Lian] Renames FSBased* to HadoopFs*, also renamed FSBasedParquetRelation back to ParquetRelation2
---
 .../org/apache/spark/sql/SQLContext.scala     |  14 +-
 ...{fsBasedParquet.scala => newParquet.scala} |  71 ++++-----
 .../sql/sources/DataSourceStrategy.scala      |  10 +-
 .../spark/sql/sources/PartitioningUtils.scala |   4 +
 .../apache/spark/sql/sources/commands.scala   |  23 ++-
 .../org/apache/spark/sql/sources/ddl.scala    |   8 +-
 .../apache/spark/sql/sources/interfaces.scala | 140 +++++++++---------
 .../org/apache/spark/sql/sources/rules.scala  |   2 +-
 .../sql/parquet/ParquetFilterSuite.scala      |   2 +-
 .../sql/parquet/ParquetSchemaSuite.scala      |  12 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  12 +-
 .../spark/sql/hive/execution/commands.scala   |   2 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |   6 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   8 +-
 .../apache/spark/sql/hive/parquetSuites.scala |  20 +--
 .../sql/sources/SimpleTextRelation.scala      |  47 +++---
 ...tes.scala => hadoopFsRelationSuites.scala} |   8 +-
 17 files changed, 195 insertions(+), 194 deletions(-)
 rename sql/core/src/main/scala/org/apache/spark/sql/parquet/{fsBasedParquet.scala => newParquet.scala} (92%)
 rename sql/hive/src/test/scala/org/apache/spark/sql/sources/{fsBasedRelationSuites.scala => hadoopFsRelationSuites.scala} (98%)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index b33a700208014..9fb355eb81939 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.ParserDialect
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
 import org.apache.spark.sql.json._
-import org.apache.spark.sql.parquet.FSBasedParquetRelation
+import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -610,7 +610,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     } else if (conf.parquetUseDataSourceApi) {
       val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
       baseRelationToDataFrame(
-        new FSBasedParquetRelation(
+        new ParquetRelation2(
           globbedPaths.map(_.toString), None, None, Map.empty[String, String])(this))
     } else {
       DataFrame(this, parquet.ParquetRelation(
@@ -989,7 +989,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def jdbc(url: String, table: String): DataFrame = {
     jdbc(url, table, JDBCRelation.columnPartition(null), new Properties())
   }
-  
+
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
@@ -1002,7 +1002,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def jdbc(url: String, table: String, properties: Properties): DataFrame = {
     jdbc(url, table, JDBCRelation.columnPartition(null), properties)
   }
-  
+
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
@@ -1020,7 +1020,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @Experimental
   def jdbc(
       url: String,
-      table: String,  
+      table: String,
       columnName: String,
       lowerBound: Long,
       upperBound: Long,
@@ -1056,7 +1056,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     val parts = JDBCRelation.columnPartition(partitioning)
     jdbc(url, table, parts, properties)
   }
-  
+
   /**
    * :: Experimental ::
    * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
@@ -1093,7 +1093,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     }
     jdbc(url, table, parts, properties)
   }
-  
+
   private def jdbc(
       url: String,
       table: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
similarity index 92%
rename from sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index c83a9c35dbddf..946062f6ea64e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/fsBasedParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -41,27 +41,23 @@ import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.{Row, SQLConf, SQLContext}
 import org.apache.spark.{Logging, Partition => SparkPartition, SparkException}
 
-private[sql] class DefaultSource extends FSBasedRelationProvider {
+private[sql] class DefaultSource extends HadoopFsRelationProvider {
   override def createRelation(
       sqlContext: SQLContext,
       paths: Array[String],
       schema: Option[StructType],
       partitionColumns: Option[StructType],
-      parameters: Map[String, String]): FSBasedRelation = {
+      parameters: Map[String, String]): HadoopFsRelation = {
     val partitionSpec = partitionColumns.map(PartitionSpec(_, Seq.empty))
-    new FSBasedParquetRelation(paths, schema, partitionSpec, parameters)(sqlContext)
+    new ParquetRelation2(paths, schema, partitionSpec, parameters)(sqlContext)
   }
 }
 
 // NOTE: This class is instantiated and used on executor side only, no need to be serializable.
-private[sql] class ParquetOutputWriter extends OutputWriter {
-  private var recordWriter: RecordWriter[Void, Row] = _
-  private var taskAttemptContext: TaskAttemptContext = _
-
-  override def init(
-      path: String,
-      dataSchema: StructType,
-      context: TaskAttemptContext): Unit = {
+private[sql] class ParquetOutputWriter(path: String, context: TaskAttemptContext)
+  extends OutputWriter {
+
+  private val recordWriter: RecordWriter[Void, Row] = {
     val conf = context.getConfiguration
     val outputFormat = {
       // When appending new Parquet files to an existing Parquet file directory, to avoid
@@ -77,7 +73,7 @@ private[sql] class ParquetOutputWriter extends OutputWriter {
         if (fs.exists(outputPath)) {
           // Pattern used to match task ID in part file names, e.g.:
           //
-          //   part-r-00001.gz.part
+          //   part-r-00001.gz.parquet
           //          ^~~~~
           val partFilePattern = """part-.-(\d{1,}).*""".r
 
@@ -86,9 +82,8 @@ private[sql] class ParquetOutputWriter extends OutputWriter {
             case name if name.startsWith("_") => 0
             case name if name.startsWith(".") => 0
             case name => sys.error(
-              s"""Trying to write Parquet files to directory $outputPath,
-                 |but found items with illegal name "$name"
-               """.stripMargin.replace('\n', ' ').trim)
+              s"Trying to write Parquet files to directory $outputPath, " +
+                s"but found items with illegal name '$name'.")
           }.reduceOption(_ max _).getOrElse(0)
         } else {
           0
@@ -111,37 +106,39 @@ private[sql] class ParquetOutputWriter extends OutputWriter {
       }
     }
 
-    recordWriter = outputFormat.getRecordWriter(context)
-    taskAttemptContext = context
+    outputFormat.getRecordWriter(context)
   }
 
   override def write(row: Row): Unit = recordWriter.write(null, row)
 
-  override def close(): Unit = recordWriter.close(taskAttemptContext)
+  override def close(): Unit = recordWriter.close(context)
 }
 
-private[sql] class FSBasedParquetRelation(
-    paths: Array[String],
+private[sql] class ParquetRelation2(
+    override val paths: Array[String],
     private val maybeDataSchema: Option[StructType],
     private val maybePartitionSpec: Option[PartitionSpec],
     parameters: Map[String, String])(
     val sqlContext: SQLContext)
-  extends FSBasedRelation(paths, maybePartitionSpec)
+  extends HadoopFsRelation(maybePartitionSpec)
   with Logging {
 
   // Should we merge schemas from all Parquet part-files?
   private val shouldMergeSchemas =
-    parameters.getOrElse(FSBasedParquetRelation.MERGE_SCHEMA, "true").toBoolean
+    parameters.getOrElse(ParquetRelation2.MERGE_SCHEMA, "true").toBoolean
 
   private val maybeMetastoreSchema = parameters
-    .get(FSBasedParquetRelation.METASTORE_SCHEMA)
+    .get(ParquetRelation2.METASTORE_SCHEMA)
     .map(DataType.fromJson(_).asInstanceOf[StructType])
 
-  private val metadataCache = new MetadataCache
-  metadataCache.refresh()
+  private lazy val metadataCache: MetadataCache = {
+    val meta = new MetadataCache
+    meta.refresh()
+    meta
+  }
 
   override def equals(other: scala.Any): Boolean = other match {
-    case that: FSBasedParquetRelation =>
+    case that: ParquetRelation2 =>
       val schemaEquality = if (shouldMergeSchemas) {
         this.shouldMergeSchemas == that.shouldMergeSchemas
       } else {
@@ -175,8 +172,6 @@ private[sql] class FSBasedParquetRelation(
     }
   }
 
-  override def outputWriterClass: Class[_ <: OutputWriter] = classOf[ParquetOutputWriter]
-
   override def dataSchema: StructType = metadataCache.dataSchema
 
   override private[sql] def refresh(): Unit = {
@@ -187,9 +182,12 @@ private[sql] class FSBasedParquetRelation(
   // Parquet data source always uses Catalyst internal representations.
   override val needConversion: Boolean = false
 
-  override val sizeInBytes = metadataCache.dataStatuses.map(_.getLen).sum
+  override def sizeInBytes: Long = metadataCache.dataStatuses.map(_.getLen).sum
+
+  override def userDefinedPartitionColumns: Option[StructType] =
+    maybePartitionSpec.map(_.partitionColumns)
 
-  override def prepareForWrite(job: Job): Unit = {
+  override def prepareJobForWrite(job: Job): OutputWriterFactory = {
     val conf = ContextUtil.getConfiguration(job)
 
     val committerClass =
@@ -224,6 +222,13 @@ private[sql] class FSBasedParquetRelation(
         .getOrElse(
           sqlContext.conf.parquetCompressionCodec.toUpperCase,
           CompressionCodecName.UNCOMPRESSED).name())
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String, dataSchema: StructType, context: TaskAttemptContext): OutputWriter = {
+        new ParquetOutputWriter(path, context)
+      }
+    }
   }
 
   override def buildScan(
@@ -385,7 +390,7 @@ private[sql] class FSBasedParquetRelation(
         // case insensitivity issue and possible schema mismatch (probably caused by schema
         // evolution).
         maybeMetastoreSchema
-          .map(FSBasedParquetRelation.mergeMetastoreParquetSchema(_, dataSchema0))
+          .map(ParquetRelation2.mergeMetastoreParquetSchema(_, dataSchema0))
           .getOrElse(dataSchema0)
       }
     }
@@ -439,12 +444,12 @@ private[sql] class FSBasedParquetRelation(
         "No schema defined, " +
           s"and no Parquet data file or summary file found under ${paths.mkString(", ")}.")
 
-      FSBasedParquetRelation.readSchema(filesToTouch.map(footers.apply), sqlContext)
+      ParquetRelation2.readSchema(filesToTouch.map(footers.apply), sqlContext)
     }
   }
 }
 
-private[sql] object FSBasedParquetRelation extends Logging {
+private[sql] object ParquetRelation2 extends Logging {
   // Whether we should merge schemas collected from all Parquet part-files.
   private[sql] val MERGE_SCHEMA = "mergeSchema"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index ee099ab9593c7..e6324b20b3065 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -59,7 +59,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
         (a, _) => t.buildScan(a)) :: Nil
 
     // Scanning partitioned FSBasedRelation
-    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: FSBasedRelation))
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation))
         if t.partitionSpec.partitionColumns.nonEmpty =>
       val selectedPartitions = prunePartitions(filters, t.partitionSpec).toArray
 
@@ -87,7 +87,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
         selectedPartitions) :: Nil
 
     // Scanning non-partitioned FSBasedRelation
-    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: FSBasedRelation)) =>
+    case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation)) =>
       val inputPaths = t.paths.map(new Path(_)).flatMap { path =>
         val fs = path.getFileSystem(t.sqlContext.sparkContext.hadoopConfiguration)
         val qualifiedPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
@@ -111,10 +111,10 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
       execution.ExecutedCommand(InsertIntoDataSource(l, query, overwrite)) :: Nil
 
     case i @ logical.InsertIntoTable(
-      l @ LogicalRelation(t: FSBasedRelation), part, query, overwrite, false) if part.isEmpty =>
+      l @ LogicalRelation(t: HadoopFsRelation), part, query, overwrite, false) if part.isEmpty =>
       val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
       execution.ExecutedCommand(
-        InsertIntoFSBasedRelation(t, query, Array.empty[String], mode)) :: Nil
+        InsertIntoHadoopFsRelation(t, query, Array.empty[String], mode)) :: Nil
 
     case _ => Nil
   }
@@ -126,7 +126,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
       partitionColumns: StructType,
       partitions: Array[Partition]) = {
     val output = projections.map(_.toAttribute)
-    val relation = logicalRelation.relation.asInstanceOf[FSBasedRelation]
+    val relation = logicalRelation.relation.asInstanceOf[HadoopFsRelation]
 
     // Builds RDD[Row]s for each selected partition.
     val perPartitionRows = partitions.map { case Partition(partitionValues, dir) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
index d30f7f65e21c0..d1f0cdab55f66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
@@ -35,6 +35,10 @@ private[sql] case class Partition(values: Row, path: String)
 private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition])
 
 private[sql] object PartitioningUtils {
+  // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since sql/core doesn't
+  // depend on Hive.
+  private[sql] val DEFAULT_PARTITION_NAME = "__HIVE_DEFAULT_PARTITION__"
+
   private[sql] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) {
     require(columnNames.size == literals.size)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index 7879328bbaaab..a09bb08de736a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -58,8 +58,8 @@ private[sql] case class InsertIntoDataSource(
   }
 }
 
-private[sql] case class InsertIntoFSBasedRelation(
-    @transient relation: FSBasedRelation,
+private[sql] case class InsertIntoHadoopFsRelation(
+    @transient relation: HadoopFsRelation,
     @transient query: LogicalPlan,
     partitionColumns: Array[String],
     mode: SaveMode)
@@ -102,7 +102,7 @@ private[sql] case class InsertIntoFSBasedRelation(
         insert(new DefaultWriterContainer(relation, job), df)
       } else {
         val writerContainer = new DynamicPartitionWriterContainer(
-          relation, job, partitionColumns, "__HIVE_DEFAULT_PARTITION__")
+          relation, job, partitionColumns, PartitioningUtils.DEFAULT_PARTITION_NAME)
         insertWithDynamicPartitions(sqlContext, writerContainer, df, partitionColumns)
       }
     }
@@ -234,7 +234,7 @@ private[sql] case class InsertIntoFSBasedRelation(
 }
 
 private[sql] abstract class BaseWriterContainer(
-    @transient val relation: FSBasedRelation,
+    @transient val relation: HadoopFsRelation,
     @transient job: Job)
   extends SparkHadoopMapReduceUtil
   with Logging
@@ -261,7 +261,7 @@ private[sql] abstract class BaseWriterContainer(
 
   protected val dataSchema = relation.dataSchema
 
-  protected val outputWriterClass: Class[_ <: OutputWriter] = relation.outputWriterClass
+  protected var outputWriterFactory: OutputWriterFactory = _
 
   private var outputFormatClass: Class[_ <: OutputFormat[_, _]] = _
 
@@ -269,7 +269,7 @@ private[sql] abstract class BaseWriterContainer(
     setupIDs(0, 0, 0)
     setupConf()
     taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId)
-    relation.prepareForWrite(job)
+    outputWriterFactory = relation.prepareJobForWrite(job)
     outputFormatClass = job.getOutputFormatClass
     outputCommitter = newOutputCommitter(taskAttemptContext)
     outputCommitter.setupJob(jobContext)
@@ -346,16 +346,15 @@ private[sql] abstract class BaseWriterContainer(
 }
 
 private[sql] class DefaultWriterContainer(
-    @transient relation: FSBasedRelation,
+    @transient relation: HadoopFsRelation,
     @transient job: Job)
   extends BaseWriterContainer(relation, job) {
 
   @transient private var writer: OutputWriter = _
 
   override protected def initWriters(): Unit = {
-    writer = outputWriterClass.newInstance()
     taskAttemptContext.getConfiguration.set("spark.sql.sources.output.path", outputPath)
-    writer.init(getWorkPath, dataSchema, taskAttemptContext)
+    writer = outputWriterFactory.newInstance(getWorkPath, dataSchema, taskAttemptContext)
   }
 
   override def outputWriterForRow(row: Row): OutputWriter = writer
@@ -372,7 +371,7 @@ private[sql] class DefaultWriterContainer(
 }
 
 private[sql] class DynamicPartitionWriterContainer(
-    @transient relation: FSBasedRelation,
+    @transient relation: HadoopFsRelation,
     @transient job: Job,
     partitionColumns: Array[String],
     defaultPartitionName: String)
@@ -398,12 +397,10 @@ private[sql] class DynamicPartitionWriterContainer(
 
     outputWriters.getOrElseUpdate(partitionPath, {
       val path = new Path(getWorkPath, partitionPath)
-      val writer = outputWriterClass.newInstance()
       taskAttemptContext.getConfiguration.set(
         "spark.sql.sources.output.path",
         new Path(outputPath, partitionPath).toString)
-      writer.init(path.toString, dataSchema, taskAttemptContext)
-      writer
+      outputWriterFactory.newInstance(path.toString, dataSchema, taskAttemptContext)
     })
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 595c5eb40e295..37a569db311ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -226,7 +226,7 @@ private[sql] object ResolvedDataSource {
       case Some(schema: StructType) => clazz.newInstance() match {
         case dataSource: SchemaRelationProvider =>
           dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options), schema)
-        case dataSource: FSBasedRelationProvider =>
+        case dataSource: HadoopFsRelationProvider =>
           val maybePartitionsSchema = if (partitionColumns.isEmpty) {
             None
           } else {
@@ -256,7 +256,7 @@ private[sql] object ResolvedDataSource {
       case None => clazz.newInstance() match {
         case dataSource: RelationProvider =>
           dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options))
-        case dataSource: FSBasedRelationProvider =>
+        case dataSource: HadoopFsRelationProvider =>
           val caseInsensitiveOptions = new CaseInsensitiveMap(options)
           val paths = {
             val patternPath = new Path(caseInsensitiveOptions("path"))
@@ -296,7 +296,7 @@ private[sql] object ResolvedDataSource {
     val relation = clazz.newInstance() match {
       case dataSource: CreatableRelationProvider =>
         dataSource.createRelation(sqlContext, mode, options, data)
-      case dataSource: FSBasedRelationProvider =>
+      case dataSource: HadoopFsRelationProvider =>
         // Don't glob path for the write path.  The contracts here are:
         //  1. Only one output path can be specified on the write path;
         //  2. Output path must be a legal HDFS style file system path;
@@ -315,7 +315,7 @@ private[sql] object ResolvedDataSource {
           Some(partitionColumnsSchema(data.schema, partitionColumns)),
           caseInsensitiveOptions)
         sqlContext.executePlan(
-          InsertIntoFSBasedRelation(
+          InsertIntoHadoopFsRelation(
             r,
             data.logicalPlan,
             partitionColumns.toArray,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 6f315305c11d6..274ab4485217a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Row, _}
+import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
 import org.apache.spark.sql.types.{StructField, StructType}
@@ -94,7 +94,7 @@ trait SchemaRelationProvider {
  * ::DeveloperApi::
  * Implemented by objects that produce relations for a specific kind of data source
  * with a given schema and partitioned columns.  When Spark SQL is given a DDL operation with a
- * USING clause specified (to specify the implemented [[FSBasedRelationProvider]]), a user defined
+ * USING clause specified (to specify the implemented [[HadoopFsRelationProvider]]), a user defined
  * schema, and an optional list of partition columns, this interface is used to pass in the
  * parameters specified by a user.
  *
@@ -105,15 +105,15 @@ trait SchemaRelationProvider {
  *
  * A new instance of this class with be instantiated each time a DDL call is made.
  *
- * The difference between a [[RelationProvider]] and a [[FSBasedRelationProvider]] is
+ * The difference between a [[RelationProvider]] and a [[HadoopFsRelationProvider]] is
  * that users need to provide a schema and a (possibly empty) list of partition columns when
  * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]],
- * and [[FSBasedRelationProvider]] if it can support schema inference, user-specified
+ * and [[HadoopFsRelationProvider]] if it can support schema inference, user-specified
  * schemas, and accessing partitioned relations.
  *
  * @since 1.4.0
  */
-trait FSBasedRelationProvider {
+trait HadoopFsRelationProvider {
   /**
    * Returns a new base relation with the given parameters, a user defined schema, and a list of
    * partition columns. Note: the parameters' keywords are case insensitive and this insensitivity
@@ -124,7 +124,7 @@ trait FSBasedRelationProvider {
       paths: Array[String],
       schema: Option[StructType],
       partitionColumns: Option[StructType],
-      parameters: Map[String, String]): FSBasedRelation
+      parameters: Map[String, String]): HadoopFsRelation
 }
 
 /**
@@ -280,33 +280,42 @@ trait CatalystScan {
 
 /**
  * ::Experimental::
- * [[OutputWriter]] is used together with [[FSBasedRelation]] for persisting rows to the
- * underlying file system.  Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
- * An [[OutputWriter]] instance is created and initialized when a new output file is opened on
- * executor side.  This instance is used to persist rows to this single output file.
+ * A factory that produces [[OutputWriter]]s.  A new [[OutputWriterFactory]] is created on driver
+ * side for each write job issued when writing to a [[HadoopFsRelation]], and then gets serialized
+ * to executor side to create actual [[OutputWriter]]s on the fly.
  *
  * @since 1.4.0
  */
 @Experimental
-abstract class OutputWriter {
+abstract class OutputWriterFactory extends Serializable {
   /**
-   * Initializes this [[OutputWriter]] before any rows are persisted.
+   * When writing to a [[HadoopFsRelation]], this method gets called by each task on executor side
+   * to instantiate new [[OutputWriter]]s.
    *
    * @param path Path of the file to which this [[OutputWriter]] is supposed to write.  Note that
    *        this may not point to the final output file.  For example, `FileOutputFormat` writes to
    *        temporary directories and then merge written files back to the final destination.  In
    *        this case, `path` points to a temporary output file under the temporary directory.
    * @param dataSchema Schema of the rows to be written. Partition columns are not included in the
-   *        schema if the corresponding relation is partitioned.
+   *        schema if the relation being written is partitioned.
    * @param context The Hadoop MapReduce task context.
    *
    * @since 1.4.0
    */
-  def init(
-      path: String,
-      dataSchema: StructType,
-      context: TaskAttemptContext): Unit = ()
+  def newInstance(path: String, dataSchema: StructType, context: TaskAttemptContext): OutputWriter
+}
 
+/**
+ * ::Experimental::
+ * [[OutputWriter]] is used together with [[HadoopFsRelation]] for persisting rows to the
+ * underlying file system.  Subclasses of [[OutputWriter]] must provide a zero-argument constructor.
+ * An [[OutputWriter]] instance is created and initialized when a new output file is opened on
+ * executor side.  This instance is used to persist rows to this single output file.
+ *
+ * @since 1.4.0
+ */
+@Experimental
+abstract class OutputWriter {
   /**
    * Persists a single row.  Invoked on the executor side.  When writing to dynamically partitioned
    * tables, dynamic partition columns are not included in rows to be written.
@@ -333,74 +342,71 @@ abstract class OutputWriter {
  * filter using selected predicates before producing an RDD containing all matching tuples as
  * [[Row]] objects. In addition, when reading from Hive style partitioned tables stored in file
  * systems, it's able to discover partitioning information from the paths of input directories, and
- * perform partition pruning before start reading the data. Subclasses of [[FSBasedRelation()]] must
- * override one of the three `buildScan` methods to implement the read path.
+ * perform partition pruning before start reading the data. Subclasses of [[HadoopFsRelation()]]
+ * must override one of the three `buildScan` methods to implement the read path.
  *
  * For the write path, it provides the ability to write to both non-partitioned and partitioned
  * tables.  Directory layout of the partitioned tables is compatible with Hive.
  *
  * @constructor This constructor is for internal uses only. The [[PartitionSpec]] argument is for
  *              implementing metastore table conversion.
- * @param paths Base paths of this relation.  For partitioned relations, it should be the root
- *        directories of all partition directories.
- * @param maybePartitionSpec An [[FSBasedRelation]] can be created with an optional
+ *
+ * @param maybePartitionSpec An [[HadoopFsRelation]] can be created with an optional
  *        [[PartitionSpec]], so that partition discovery can be skipped.
  *
  * @since 1.4.0
  */
 @Experimental
-abstract class FSBasedRelation private[sql](
-    val paths: Array[String],
-    maybePartitionSpec: Option[PartitionSpec])
+abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[PartitionSpec])
   extends BaseRelation {
 
+  def this() = this(None)
+
+  private val hadoopConf = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
+
+  private val codegenEnabled = sqlContext.conf.codegenEnabled
+
+  private var _partitionSpec: PartitionSpec = _
+
+  final private[sql] def partitionSpec: PartitionSpec = {
+    if (_partitionSpec == null) {
+      _partitionSpec = maybePartitionSpec
+        .map(spec => spec.copy(partitionColumns = spec.partitionColumns.asNullable))
+        .orElse(userDefinedPartitionColumns.map(PartitionSpec(_, Array.empty[Partition])))
+        .getOrElse {
+        if (sqlContext.conf.partitionDiscoveryEnabled()) {
+          discoverPartitions()
+        } else {
+          PartitionSpec(StructType(Nil), Array.empty[Partition])
+        }
+      }
+    }
+    _partitionSpec
+  }
+
   /**
-   * Constructs an [[FSBasedRelation]].
-   *
-   * @param paths Base paths of this relation.  For partitioned relations, it should be either root
-   *        directories of all partition directories.
-   * @param partitionColumns Partition columns of this relation.
+   * Base paths of this relation.  For partitioned relations, it should be either root directories
+   * of all partition directories.
    *
    * @since 1.4.0
    */
-  def this(paths: Array[String], partitionColumns: StructType) =
-    this(paths, {
-      if (partitionColumns.isEmpty) None
-      else Some(PartitionSpec(partitionColumns, Array.empty[Partition]))
-    })
+  def paths: Array[String]
 
   /**
-   * Constructs an [[FSBasedRelation]].
-   *
-   * @param paths Base paths of this relation.  For partitioned relations, it should be root
-   *        directories of all partition directories.
+   * Partition columns.  Can be either defined by [[userDefinedPartitionColumns]] or automatically
+   * discovered.  Note that they should always be nullable.
    *
    * @since 1.4.0
    */
-  def this(paths: Array[String]) = this(paths, None)
-
-  private val hadoopConf = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
-
-  private val codegenEnabled = sqlContext.conf.codegenEnabled
-
-  private var _partitionSpec: PartitionSpec = maybePartitionSpec.map { spec =>
-    spec.copy(partitionColumns = spec.partitionColumns.asNullable)
-  }.getOrElse {
-    if (sqlContext.conf.partitionDiscoveryEnabled()) {
-      discoverPartitions()
-    } else {
-      PartitionSpec(StructType(Nil), Array.empty[Partition])
-    }
-  }
-
-  private[sql] def partitionSpec: PartitionSpec = _partitionSpec
+  final def partitionColumns: StructType =
+    userDefinedPartitionColumns.getOrElse(partitionSpec.partitionColumns)
 
   /**
-   * Partition columns. Note that they are always nullable.
+   * Optional user defined partition columns.
    *
    * @since 1.4.0
    */
-  def partitionColumns: StructType = partitionSpec.partitionColumns
+  def userDefinedPartitionColumns: Option[StructType] = None
 
   private[sql] def refresh(): Unit = {
     if (sqlContext.conf.partitionDiscoveryEnabled()) {
@@ -419,7 +425,7 @@ abstract class FSBasedRelation private[sql](
     }.map(_.getPath)
 
     if (leafDirs.nonEmpty) {
-      PartitioningUtils.parsePartitions(leafDirs, "__HIVE_DEFAULT_PARTITION__")
+      PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME)
     } else {
       PartitionSpec(StructType(Array.empty[StructField]), Array.empty[Partition])
     }
@@ -458,7 +464,7 @@ abstract class FSBasedRelation private[sql](
    * @since 1.4.0
    */
   def buildScan(inputPaths: Array[String]): RDD[Row] = {
-    throw new RuntimeException(
+    throw new UnsupportedOperationException(
       "At least one buildScan() method should be overridden to read the relation.")
   }
 
@@ -520,8 +526,8 @@ abstract class FSBasedRelation private[sql](
   }
 
   /**
-   * Client side preparation for data writing can be put here.  For example, user defined output
-   * committer can be configured here.
+   * Prepares a write job and returns an [[OutputWriterFactory]].  Client side job preparation can
+   * be put here.  For example, user defined output committer can be configured here.
    *
    * Note that the only side effect expected here is mutating `job` via its setters.  Especially,
    * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states
@@ -529,13 +535,5 @@ abstract class FSBasedRelation private[sql](
    *
    * @since 1.4.0
    */
-  def prepareForWrite(job: Job): Unit = ()
-
-  /**
-   * This method is responsible for producing a new [[OutputWriter]] for each newly opened output
-   * file on the executor side.
-   *
-   * @since 1.4.0
-   */
-  def outputWriterClass: Class[_ <: OutputWriter]
+  def prepareJobForWrite(job: Job): OutputWriterFactory
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
index aad1d248d0a28..1eacdde7413f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
@@ -102,7 +102,7 @@ private[sql] case class PreWriteCheck(catalog: Catalog) extends (LogicalPlan =>
         }
 
       case logical.InsertIntoTable(LogicalRelation(_: InsertableRelation), _, _, _, _) => // OK
-      case logical.InsertIntoTable(LogicalRelation(_: FSBasedRelation), _, _, _, _) => // OK
+      case logical.InsertIntoTable(LogicalRelation(_: HadoopFsRelation), _, _, _, _) => // OK
       case logical.InsertIntoTable(l: LogicalRelation, _, _, _, _) =>
         // The relation in l is not an InsertableRelation.
         failAnalysis(s"$l does not allow insertion.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
index 3bbc5b05868af..5ad439584716f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
@@ -63,7 +63,7 @@ class ParquetFilterSuiteBase extends QueryTest with ParquetTest {
         }.flatten.reduceOption(_ && _)
 
         val forParquetDataSource = query.queryExecution.optimizedPlan.collect {
-          case PhysicalOperation(_, filters, LogicalRelation(_: FSBasedParquetRelation)) => filters
+          case PhysicalOperation(_, filters, LogicalRelation(_: ParquetRelation2)) => filters
         }.flatten.reduceOption(_ && _)
 
         forParquetTableScan.orElse(forParquetDataSource)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
index fc90e3edce7fe..c964b6d984557 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -204,7 +204,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
         StructField("lowerCase", StringType),
         StructField("UPPERCase", DoubleType, nullable = false)))) {
 
-      FSBasedParquetRelation.mergeMetastoreParquetSchema(
+      ParquetRelation2.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("lowercase", StringType),
           StructField("uppercase", DoubleType, nullable = false))),
@@ -219,7 +219,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
       StructType(Seq(
         StructField("UPPERCase", DoubleType, nullable = false)))) {
 
-      FSBasedParquetRelation.mergeMetastoreParquetSchema(
+      ParquetRelation2.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("uppercase", DoubleType, nullable = false))),
 
@@ -230,7 +230,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
 
     // Metastore schema contains additional non-nullable fields.
     assert(intercept[Throwable] {
-      FSBasedParquetRelation.mergeMetastoreParquetSchema(
+      ParquetRelation2.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("uppercase", DoubleType, nullable = false),
           StructField("lowerCase", BinaryType, nullable = false))),
@@ -241,7 +241,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
 
     // Conflicting non-nullable field names
     intercept[Throwable] {
-      FSBasedParquetRelation.mergeMetastoreParquetSchema(
+      ParquetRelation2.mergeMetastoreParquetSchema(
         StructType(Seq(StructField("lower", StringType, nullable = false))),
         StructType(Seq(StructField("lowerCase", BinaryType))))
     }
@@ -255,7 +255,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
         StructField("firstField", StringType, nullable = true),
         StructField("secondField", StringType, nullable = true),
         StructField("thirdfield", StringType, nullable = true)))) {
-      FSBasedParquetRelation.mergeMetastoreParquetSchema(
+      ParquetRelation2.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("firstfield", StringType, nullable = true),
           StructField("secondfield", StringType, nullable = true),
@@ -268,7 +268,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
     // Merge should fail if the Metastore contains any additional fields that are not
     // nullable.
     assert(intercept[Throwable] {
-      FSBasedParquetRelation.mergeMetastoreParquetSchema(
+      ParquetRelation2.mergeMetastoreParquetSchema(
         StructType(Seq(
           StructField("firstfield", StringType, nullable = true),
           StructField("secondfield", StringType, nullable = true),
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index b0e82c8d033b2..2aa80b47a97e2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.hive.client._
-import org.apache.spark.sql.parquet.FSBasedParquetRelation
+import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, Partition => ParquetPartition, PartitionSpec, ResolvedDataSource}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{AnalysisException, SQLContext, SaveMode, sources}
@@ -226,8 +226,8 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
     // serialize the Metastore schema to JSON and pass it as a data source option because of the
     // evil case insensitivity issue, which is reconciled within `ParquetRelation2`.
     val parquetOptions = Map(
-      FSBasedParquetRelation.METASTORE_SCHEMA -> metastoreSchema.json,
-      FSBasedParquetRelation.MERGE_SCHEMA -> mergeSchema.toString)
+      ParquetRelation2.METASTORE_SCHEMA -> metastoreSchema.json,
+      ParquetRelation2.MERGE_SCHEMA -> mergeSchema.toString)
     val tableIdentifier =
       QualifiedTableName(metastoreRelation.databaseName, metastoreRelation.tableName)
 
@@ -238,7 +238,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         partitionSpecInMetastore: Option[PartitionSpec]): Option[LogicalRelation] = {
       cachedDataSourceTables.getIfPresent(tableIdentifier) match {
         case null => None // Cache miss
-        case logical@LogicalRelation(parquetRelation: FSBasedParquetRelation) =>
+        case logical@LogicalRelation(parquetRelation: ParquetRelation2) =>
           // If we have the same paths, same schema, and same partition spec,
           // we will use the cached Parquet Relation.
           val useCached =
@@ -281,7 +281,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
       val cached = getCached(tableIdentifier, paths, metastoreSchema, Some(partitionSpec))
       val parquetRelation = cached.getOrElse {
         val created = LogicalRelation(
-          new FSBasedParquetRelation(
+          new ParquetRelation2(
             paths.toArray, None, Some(partitionSpec), parquetOptions)(hive))
         cachedDataSourceTables.put(tableIdentifier, created)
         created
@@ -294,7 +294,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
       val cached = getCached(tableIdentifier, paths, metastoreSchema, None)
       val parquetRelation = cached.getOrElse {
         val created = LogicalRelation(
-          new FSBasedParquetRelation(paths.toArray, None, None, parquetOptions)(hive))
+          new ParquetRelation2(paths.toArray, None, None, parquetOptions)(hive))
         cachedDataSourceTables.put(tableIdentifier, created)
         created
       }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 8e405e080489f..6609763343752 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -194,7 +194,7 @@ case class CreateMetastoreDataSourceAsSelect(
             sqlContext, Some(query.schema.asNullable), partitionColumns, provider, optionsWithPath)
           val createdRelation = LogicalRelation(resolved.relation)
           EliminateSubQueries(sqlContext.table(tableName).logicalPlan) match {
-            case l @ LogicalRelation(_: InsertableRelation | _: FSBasedRelation) =>
+            case l @ LogicalRelation(_: InsertableRelation | _: HadoopFsRelation) =>
               if (l.relation != createdRelation.relation) {
                 val errorDescription =
                   s"Cannot append to table $tableName because the resolved relation does not " +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index da5d203d9d343..1bf1c1be3e3d3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.hive.client.{HiveTable, ManagedTable}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.parquet.FSBasedParquetRelation
+import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -579,11 +579,11 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
       )
 
       table("test_parquet_ctas").queryExecution.optimizedPlan match {
-        case LogicalRelation(p: FSBasedParquetRelation) => // OK
+        case LogicalRelation(p: ParquetRelation2) => // OK
         case _ =>
           fail(
             "test_parquet_ctas should be converted to " +
-            s"${classOf[FSBasedParquetRelation].getCanonicalName}")
+            s"${classOf[ParquetRelation2].getCanonicalName}")
       }
 
       // Clenup and reset confs.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 5c7152e2140db..dfe73c62c42b9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.hive.{HiveQLDialect, HiveShim, MetastoreRelation}
-import org.apache.spark.sql.parquet.FSBasedParquetRelation
+import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.types._
 
@@ -175,17 +175,17 @@ class SQLQuerySuite extends QueryTest {
     def checkRelation(tableName: String, isDataSourceParquet: Boolean): Unit = {
       val relation = EliminateSubQueries(catalog.lookupRelation(Seq(tableName)))
       relation match {
-        case LogicalRelation(r: FSBasedParquetRelation) =>
+        case LogicalRelation(r: ParquetRelation2) =>
           if (!isDataSourceParquet) {
             fail(
               s"${classOf[MetastoreRelation].getCanonicalName} is expected, but found " +
-              s"${FSBasedParquetRelation.getClass.getCanonicalName}.")
+              s"${ParquetRelation2.getClass.getCanonicalName}.")
           }
 
         case r: MetastoreRelation =>
           if (isDataSourceParquet) {
             fail(
-              s"${FSBasedParquetRelation.getClass.getCanonicalName} is expected, but found " +
+              s"${ParquetRelation2.getClass.getCanonicalName} is expected, but found " +
               s"${classOf[MetastoreRelation].getCanonicalName}.")
           }
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 41bcbe84b0ef2..b6be09e2f8837 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD}
 import org.apache.spark.sql.hive.execution.HiveTableScan
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.parquet.{FSBasedParquetRelation, ParquetTableScan}
-import org.apache.spark.sql.sources.{InsertIntoDataSource, InsertIntoFSBasedRelation, LogicalRelation}
+import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
+import org.apache.spark.sql.sources.{InsertIntoDataSource, InsertIntoHadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode}
 import org.apache.spark.util.Utils
@@ -291,10 +291,10 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
     )
 
     table("test_parquet_ctas").queryExecution.optimizedPlan match {
-      case LogicalRelation(_: FSBasedParquetRelation) => // OK
+      case LogicalRelation(_: ParquetRelation2) => // OK
       case _ => fail(
         "test_parquet_ctas should be converted to " +
-          s"${classOf[FSBasedParquetRelation].getCanonicalName}")
+          s"${classOf[ParquetRelation2].getCanonicalName}")
     }
 
     sql("DROP TABLE IF EXISTS test_parquet_ctas")
@@ -315,9 +315,9 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
 
     val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt")
     df.queryExecution.executedPlan match {
-      case ExecutedCommand(InsertIntoFSBasedRelation(_: FSBasedParquetRelation, _, _, _)) => // OK
+      case ExecutedCommand(InsertIntoHadoopFsRelation(_: ParquetRelation2, _, _, _)) => // OK
       case o => fail("test_insert_parquet should be converted to a " +
-        s"${classOf[FSBasedParquetRelation].getCanonicalName} and " +
+        s"${classOf[ParquetRelation2].getCanonicalName} and " +
         s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan. " +
         s"However, found a ${o.toString} ")
     }
@@ -345,9 +345,9 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
 
     val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt_array")
     df.queryExecution.executedPlan match {
-      case ExecutedCommand(InsertIntoFSBasedRelation(r: FSBasedParquetRelation, _, _, _)) => // OK
+      case ExecutedCommand(InsertIntoHadoopFsRelation(r: ParquetRelation2, _, _, _)) => // OK
       case o => fail("test_insert_parquet should be converted to a " +
-        s"${classOf[FSBasedParquetRelation].getCanonicalName} and " +
+        s"${classOf[ParquetRelation2].getCanonicalName} and " +
         s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan." +
         s"However, found a ${o.toString} ")
     }
@@ -378,7 +378,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
 
     assertResult(2) {
       analyzed.collect {
-        case r @ LogicalRelation(_: FSBasedParquetRelation) => r
+        case r @ LogicalRelation(_: ParquetRelation2) => r
       }.size
     }
 
@@ -390,7 +390,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
       // Converted test_parquet should be cached.
       catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) match {
         case null => fail("Converted test_parquet should be cached in the cache.")
-        case logical @ LogicalRelation(parquetRelation: FSBasedParquetRelation) => // OK
+        case logical @ LogicalRelation(parquetRelation: ParquetRelation2) => // OK
         case other =>
           fail(
             "The cached test_parquet should be a Parquet Relation. " +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
index 8801aba2f64c3..29b21586f9c2a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -24,7 +24,7 @@ import com.google.common.base.Objects
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{NullWritable, Text}
 import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat, TextOutputFormat}
-import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext}
+import org.apache.hadoop.mapreduce.{Job, RecordWriter, TaskAttemptContext}
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
@@ -32,17 +32,16 @@ import org.apache.spark.sql.types.{DataType, StructField, StructType}
 import org.apache.spark.sql.{Row, SQLContext}
 
 /**
- * A simple example [[FSBasedRelationProvider]].
+ * A simple example [[HadoopFsRelationProvider]].
  */
-class SimpleTextSource extends FSBasedRelationProvider {
+class SimpleTextSource extends HadoopFsRelationProvider {
   override def createRelation(
       sqlContext: SQLContext,
       paths: Array[String],
       schema: Option[StructType],
       partitionColumns: Option[StructType],
-      parameters: Map[String, String]): FSBasedRelation = {
-    val partitionsSchema = partitionColumns.getOrElse(StructType(Array.empty[StructField]))
-    new SimpleTextRelation(paths, schema, partitionsSchema, parameters)(sqlContext)
+      parameters: Map[String, String]): HadoopFsRelation = {
+    new SimpleTextRelation(paths, schema, partitionColumns, parameters)(sqlContext)
   }
 }
 
@@ -59,38 +58,30 @@ class AppendingTextOutputFormat(outputFile: Path) extends TextOutputFormat[NullW
   }
 }
 
-class SimpleTextOutputWriter extends OutputWriter {
-  private var recordWriter: RecordWriter[NullWritable, Text] = _
-  private var taskAttemptContext: TaskAttemptContext = _
-
-  override def init(
-      path: String,
-      dataSchema: StructType,
-      context: TaskAttemptContext): Unit = {
-    recordWriter = new AppendingTextOutputFormat(new Path(path)).getRecordWriter(context)
-    taskAttemptContext = context
-  }
+class SimpleTextOutputWriter(path: String, context: TaskAttemptContext) extends OutputWriter {
+  private val recordWriter: RecordWriter[NullWritable, Text] =
+    new AppendingTextOutputFormat(new Path(path)).getRecordWriter(context)
 
   override def write(row: Row): Unit = {
     val serialized = row.toSeq.map(_.toString).mkString(",")
     recordWriter.write(null, new Text(serialized))
   }
 
-  override def close(): Unit = recordWriter.close(taskAttemptContext)
+  override def close(): Unit = recordWriter.close(context)
 }
 
 /**
- * A simple example [[FSBasedRelation]], used for testing purposes.  Data are stored as comma
+ * A simple example [[HadoopFsRelation]], used for testing purposes.  Data are stored as comma
  * separated string lines.  When scanning data, schema must be explicitly provided via data source
  * option `"dataSchema"`.
  */
 class SimpleTextRelation(
-    paths: Array[String],
+    override val paths: Array[String],
     val maybeDataSchema: Option[StructType],
-    partitionsSchema: StructType,
+    override val userDefinedPartitionColumns: Option[StructType],
     parameters: Map[String, String])(
     @transient val sqlContext: SQLContext)
-  extends FSBasedRelation(paths, partitionsSchema) {
+  extends HadoopFsRelation {
 
   import sqlContext.sparkContext
 
@@ -110,9 +101,6 @@ class SimpleTextRelation(
   override def hashCode(): Int =
     Objects.hashCode(paths, maybeDataSchema, dataSchema)
 
-  override def outputWriterClass: Class[_ <: OutputWriter] =
-    classOf[SimpleTextOutputWriter]
-
   override def buildScan(inputPaths: Array[String]): RDD[Row] = {
     val fields = dataSchema.map(_.dataType)
 
@@ -122,4 +110,13 @@ class SimpleTextRelation(
       }: _*)
     }
   }
+
+  override def prepareJobForWrite(job: Job): OutputWriterFactory = new OutputWriterFactory {
+    override def newInstance(
+        path: String,
+        dataSchema: StructType,
+        context: TaskAttemptContext): OutputWriter = {
+      new SimpleTextOutputWriter(path, context)
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/fsBasedRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
similarity index 98%
rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/fsBasedRelationSuites.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
index 394833f22907d..cf6afd25ae5a0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/fsBasedRelationSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types._
 // TODO Don't extend ParquetTest
 // This test suite extends ParquetTest for some convenient utility methods. These methods should be
 // moved to some more general places, maybe QueryTest.
-class FSBasedRelationTest extends QueryTest with ParquetTest {
+class HadoopFsRelationTest extends QueryTest with ParquetTest {
   override val sqlContext: SQLContext = TestHive
 
   import sqlContext._
@@ -487,7 +487,7 @@ class FSBasedRelationTest extends QueryTest with ParquetTest {
       }
 
       val actualPaths = df.queryExecution.analyzed.collectFirst {
-        case LogicalRelation(relation: FSBasedRelation) =>
+        case LogicalRelation(relation: HadoopFsRelation) =>
           relation.paths.toSet
       }.getOrElse {
         fail("Expect an FSBasedRelation, but none could be found")
@@ -499,7 +499,7 @@ class FSBasedRelationTest extends QueryTest with ParquetTest {
   }
 }
 
-class SimpleTextRelationSuite extends FSBasedRelationTest {
+class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest {
   override val dataSourceName: String = classOf[SimpleTextSource].getCanonicalName
 
   import sqlContext._
@@ -530,7 +530,7 @@ class SimpleTextRelationSuite extends FSBasedRelationTest {
   }
 }
 
-class FSBasedParquetRelationSuite extends FSBasedRelationTest {
+class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
   override val dataSourceName: String = classOf[parquet.DefaultSource].getCanonicalName
 
   import sqlContext._

From c64ff8036cc6bc7c87743f4c751d7fe91c2e366a Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Fri, 15 May 2015 11:37:34 +0100
Subject: [PATCH 187/320] [SPARK-7503] [YARN] Resources in .sparkStaging
 directory can't be cleaned up on error

When we run applications on YARN with cluster mode, uploaded resources on .sparkStaging directory can't be cleaned up in case of failure of uploading local resources.

You can see this issue by running following command.
```
bin/spark-submit --master yarn --deploy-mode cluster --class <someClassName> <non-existing-jar>
```

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #6026 from sarutak/delete-uploaded-resources-on-error and squashes the following commits:

caef9f4 [Kousuke Saruta] Fixed style
882f921 [Kousuke Saruta] Wrapped Client#submitApplication with try/catch blocks in order to delete resources on error
1786ca4 [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into delete-uploaded-resources-on-error
f61071b [Kousuke Saruta] Fixed cleanup problem
---
 .../org/apache/spark/deploy/yarn/Client.scala | 72 ++++++++++++-------
 1 file changed, 47 insertions(+), 25 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index d21a7393478ce..7e023f2d92578 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.io.{ByteArrayInputStream, DataInputStream, File, FileOutputStream}
+import java.io.{ByteArrayInputStream, DataInputStream, File, FileOutputStream, IOException}
 import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
 import java.nio.ByteBuffer
 import java.security.PrivilegedExceptionAction
@@ -91,30 +91,52 @@ private[spark] class Client(
    * available in the alpha API.
    */
   def submitApplication(): ApplicationId = {
-    // Setup the credentials before doing anything else, so we have don't have issues at any point.
-    setupCredentials()
-    yarnClient.init(yarnConf)
-    yarnClient.start()
-
-    logInfo("Requesting a new application from cluster with %d NodeManagers"
-      .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
-
-    // Get a new application from our RM
-    val newApp = yarnClient.createApplication()
-    val newAppResponse = newApp.getNewApplicationResponse()
-    val appId = newAppResponse.getApplicationId()
-
-    // Verify whether the cluster has enough resources for our AM
-    verifyClusterResources(newAppResponse)
-
-    // Set up the appropriate contexts to launch our AM
-    val containerContext = createContainerLaunchContext(newAppResponse)
-    val appContext = createApplicationSubmissionContext(newApp, containerContext)
-
-    // Finally, submit and monitor the application
-    logInfo(s"Submitting application ${appId.getId} to ResourceManager")
-    yarnClient.submitApplication(appContext)
-    appId
+    var appId: ApplicationId = null
+    try {
+      // Setup the credentials before doing anything else,
+      // so we have don't have issues at any point.
+      setupCredentials()
+      yarnClient.init(yarnConf)
+      yarnClient.start()
+
+      logInfo("Requesting a new application from cluster with %d NodeManagers"
+        .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
+
+      // Get a new application from our RM
+      val newApp = yarnClient.createApplication()
+      val newAppResponse = newApp.getNewApplicationResponse()
+      appId = newAppResponse.getApplicationId()
+
+      // Verify whether the cluster has enough resources for our AM
+      verifyClusterResources(newAppResponse)
+
+      // Set up the appropriate contexts to launch our AM
+      val containerContext = createContainerLaunchContext(newAppResponse)
+      val appContext = createApplicationSubmissionContext(newApp, containerContext)
+
+      // Finally, submit and monitor the application
+      logInfo(s"Submitting application ${appId.getId} to ResourceManager")
+      yarnClient.submitApplication(appContext)
+      appId
+    } catch {
+      case e: Throwable =>
+        if (appId != null) {
+          val appStagingDir = getAppStagingDir(appId)
+          try {
+            val preserveFiles = sparkConf.getBoolean("spark.yarn.preserve.staging.files", false)
+            val stagingDirPath = new Path(appStagingDir)
+            val fs = FileSystem.get(hadoopConf)
+            if (!preserveFiles && fs.exists(stagingDirPath)) {
+              logInfo("Deleting staging directory " + stagingDirPath)
+              fs.delete(stagingDirPath, true)
+            }
+          } catch {
+            case ioe: IOException =>
+              logWarning("Failed to cleanup staging dir " + appStagingDir, ioe)
+          }
+        }
+        throw e
+    }
   }
 
   /**

From f96b85ab44b82736363764ea39ee62884007f4a3 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 15 May 2015 10:03:29 -0700
Subject: [PATCH 188/320] [SPARK-7668] [MLLIB] Preserve isTransposed property
 for Matrix after calling map function

JIRA: https://issues.apache.org/jira/browse/SPARK-7668

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6188 from viirya/fix_matrix_map and squashes the following commits:

2a7cc97 [Liang-Chi Hsieh] Preserve isTransposed property for Matrix after calling map function.
---
 .../main/scala/org/apache/spark/mllib/linalg/Matrices.scala  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 3fa5e068d16d4..a609674df6b8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -273,7 +273,8 @@ class DenseMatrix(
 
   override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())
 
-  private[mllib] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f))
+  private[mllib] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f),
+    isTransposed)
 
   private[mllib] def update(f: Double => Double): DenseMatrix = {
     val len = values.length
@@ -535,7 +536,7 @@ class SparseMatrix(
   }
 
   private[mllib] def map(f: Double => Double) =
-    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f))
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f), isTransposed)
 
   private[mllib] def update(f: Double => Double): SparseMatrix = {
     val len = values.length

From 8f4aaba0e4e3350ab152a476d08ff60e9495c6d2 Mon Sep 17 00:00:00 2001
From: FlytxtRnD <meethu.mathew@flytxt.com>
Date: Fri, 15 May 2015 10:43:18 -0700
Subject: [PATCH 189/320] [SPARK-7651] [MLLIB] [PYSPARK] GMM predict,
 predictSoft should raise error on bad input

In the Python API for Gaussian Mixture Model, predict() and predictSoft() methods should raise an error when the input argument is not an RDD.

Author: FlytxtRnD <meethu.mathew@flytxt.com>

Closes #6180 from FlytxtRnD/GmmPredictException and squashes the following commits:

4b6aa11 [FlytxtRnD] Raise error if the input to predict()/predictSoft() is not an RDD
---
 python/pyspark/mllib/clustering.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index a53333dae6a82..b55583f82223f 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -212,6 +212,9 @@ def predict(self, x):
         if isinstance(x, RDD):
             cluster_labels = self.predictSoft(x).map(lambda z: z.index(max(z)))
             return cluster_labels
+        else:
+            raise TypeError("x should be represented by an RDD, "
+                            "but got %s." % type(x))
 
     def predictSoft(self, x):
         """
@@ -225,6 +228,9 @@ def predictSoft(self, x):
             membership_matrix = callMLlibFunc("predictSoftGMM", x.map(_convert_to_vector),
                                               _convert_to_vector(self._weights), means, sigmas)
             return membership_matrix.map(lambda x: pyarray.array('d', x))
+        else:
+            raise TypeError("x should be represented by an RDD, "
+                            "but got %s." % type(x))
 
 
 class GaussianMixture(object):

From b1b9d5802e3d185f42711ab043a21c9d1eb4763f Mon Sep 17 00:00:00 2001
From: Oleksii Kostyliev <etander@gmail.com>
Date: Fri, 15 May 2015 11:19:56 -0700
Subject: [PATCH 190/320] [SPARK-7233] [CORE] Detect REPL mode once

<h3>Description</h3>
Detect REPL mode once per JVM lifespan.
Previous behavior was to check presence of interpreter mode every time a job was submitted. In the case of execution of multiple short-living jobs this was causing massive mutual blocks between submission threads.

For more details please refer to https://issues.apache.org/jira/browse/SPARK-7233.

<h3>Notes</h3>
* I inverted the return value in case of catching an exception from `true` to `false`. It seems more logical to assume that if the REPL class is not found, we aren't in the interpreter mode.
* I'd personally would call `classForName` with just a Spark classloader (`org.apache.spark.util.Utils#getSparkClassLoader`) but `org.apache.spark.util.Utils#getContextOrSparkClassLoader` is said to be preferable.
* I struggled to come up with a concise, readable and clear unit test. Suggestions are welcome if you feel necessary.

Author: Oleksii Kostyliev <etander@gmail.com>
Author: Oleksii Kostyliev <okostyliev@thunderhead.com>

Closes #5835 from preeze/SPARK-7233 and squashes the following commits:

69bb9e4 [Oleksii Kostyliev] SPARK-7527: fixed explanatory comment to meet style-checker requirements
26dcc24 [Oleksii Kostyliev] SPARK-7527: fixed explanatory comment to meet style-checker requirements
c6f9685 [Oleksii Kostyliev] Merge remote-tracking branch 'remotes/upstream/master' into SPARK-7233
b78a983 [Oleksii Kostyliev] SPARK-7527: revert the fix and let it be addressed separately at a later stage
b64d441 [Oleksii Kostyliev] SPARK-7233: inline inInterpreter parameter into instantiateClass
86e2606 [Oleksii Kostyliev] SPARK-7233, SPARK-7527: Handle interpreter mode properly.
c7ee69c [Oleksii Kostyliev] Merge remote-tracking branch 'upstream/master' into SPARK-7233
d6c07fc [Oleksii Kostyliev] SPARK-7233: properly handle the inverted meaning of isInInterpreter
c319039 [Oleksii Kostyliev] SPARK-7233: move inInterpreter to Utils and make it lazy
---
 .../org/apache/spark/util/ClosureCleaner.scala   | 16 +++-------------
 .../main/scala/org/apache/spark/util/Utils.scala | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 6fe32e469c732..6f2966bd4fd31 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -239,15 +239,6 @@ private[spark] object ClosureCleaner extends Logging {
     logDebug(s" + fields accessed by starting closure: " + accessedFields.size)
     accessedFields.foreach { f => logDebug("     " + f) }
 
-    val inInterpreter = {
-      try {
-        val interpClass = Class.forName("spark.repl.Main")
-        interpClass.getMethod("interp").invoke(null) != null
-      } catch {
-        case _: ClassNotFoundException => true
-      }
-    }
-
     // List of outer (class, object) pairs, ordered from outermost to innermost
     // Note that all outer objects but the outermost one (first one in this list) must be closures
     var outerPairs: List[(Class[_], AnyRef)] = (outerClasses zip outerObjects).reverse
@@ -274,7 +265,7 @@ private[spark] object ClosureCleaner extends Logging {
       // required fields from the original object. We need the parent here because the Java
       // language specification requires the first constructor parameter of any closure to be
       // its enclosing object.
-      val clone = instantiateClass(cls, parent, inInterpreter)
+      val clone = instantiateClass(cls, parent)
       for (fieldName <- accessedFields(cls)) {
         val field = cls.getDeclaredField(fieldName)
         field.setAccessible(true)
@@ -327,9 +318,8 @@ private[spark] object ClosureCleaner extends Logging {
 
   private def instantiateClass(
       cls: Class[_],
-      enclosingObject: AnyRef,
-      inInterpreter: Boolean): AnyRef = {
-    if (!inInterpreter) {
+      enclosingObject: AnyRef): AnyRef = {
+    if (!Utils.isInInterpreter) {
       // This is a bona fide closure class, whose constructor has no effects
       // other than to set its fields, so use its constructor
       val cons = cls.getConstructors()(0)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 48843b4ae57c6..6a7d1fae3320e 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1795,6 +1795,20 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  lazy val isInInterpreter: Boolean = {
+    try {
+      val interpClass = classForName("spark.repl.Main")
+      interpClass.getMethod("interp").invoke(null) != null
+    } catch {
+      // Returning true seems to be a mistake.
+      // Currently changing it to false causes tests failures in Streaming.
+      // For a more detailed discussion, please, refer to
+      // https://github.com/apache/spark/pull/5835#issuecomment-101042271 and subsequent comments.
+      // Addressing this changed is tracked as https://issues.apache.org/jira/browse/SPARK-7527
+      case _: ClassNotFoundException => true
+    }
+  }
+
   /**
    * Return a well-formed URI for the file described by a user input string.
    *

From 270d4b5181b95e3f1f131b1d65dde00a7e5b9d6e Mon Sep 17 00:00:00 2001
From: Tim Ellison <t.p.ellison@gmail.com>
Date: Fri, 15 May 2015 11:27:24 -0700
Subject: [PATCH 191/320] [CORE] Protect additional test vars from early GC

Fix more places in which some test variables could be collected early by aggressive JVM optimization.
Added a couple of comments to note where existing references are sufficient in the same test pattern.

Author: Tim Ellison <t.p.ellison@gmail.com>

Closes #6187 from tellison/DefeatEarlyGC and squashes the following commits:

27329d9 [Tim Ellison] [CORE] Protect additional test vars from early GC
---
 .../scala/org/apache/spark/ContextCleanerSuite.scala   | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index cb30e1f4e63a1..0922a2c3599cc 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -165,6 +165,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     }
 
     // Test that GC causes RDD cleanup after dereferencing the RDD
+    // Note rdd is used after previous GC to avoid early collection by the JVM
     val postGCTester = new CleanerTester(sc, rddIds = Seq(rdd.id))
     rdd = null // Make RDD out of scope
     runGC()
@@ -181,9 +182,9 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     intercept[Exception] {
       preGCTester.assertCleanup()(timeout(1000 millis))
     }
+    rdd.count()  // Defeat early collection by the JVM
 
     // Test that GC causes shuffle cleanup after dereferencing the RDD
-    rdd.count()  // Defeat any early collection of rdd variable by the JVM
     val postGCTester = new CleanerTester(sc, shuffleIds = Seq(0))
     rdd = null  // Make RDD out of scope, so that corresponding shuffle goes out of scope
     runGC()
@@ -201,6 +202,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     }
 
     // Test that GC causes broadcast cleanup after dereferencing the broadcast variable
+    // Note broadcast is used after previous GC to avoid early collection by the JVM
     val postGCTester = new CleanerTester(sc, broadcastIds = Seq(broadcast.id))
     broadcast = null  // Make broadcast variable out of scope
     runGC()
@@ -226,7 +228,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
 
     // the checkpoint is not cleaned by default (without the configuration set)
     var postGCTester = new CleanerTester(sc, Seq(rddId), Nil, Nil, Nil)
-    rdd = null // Make RDD out of scope
+    rdd = null // Make RDD out of scope, ok if collected earlier
     runGC()
     postGCTester.assertCleanup()
     assert(fs.exists(RDDCheckpointData.rddCheckpointDataPath(sc, rddId).get))
@@ -245,6 +247,9 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase {
     // Confirm the checkpoint directory exists
     assert(fs.exists(RDDCheckpointData.rddCheckpointDataPath(sc, rddId).get))
 
+    // Reference rdd to defeat any early collection by the JVM
+    rdd.count()
+
     // Test that GC causes checkpoint data cleanup after dereferencing the RDD
     postGCTester = new CleanerTester(sc, Seq(rddId), Nil, Nil, Seq(rddId))
     rdd = null // Make RDD out of scope
@@ -352,6 +357,7 @@ class SortShuffleContextCleanerSuite extends ContextCleanerSuiteBase(classOf[Sor
     intercept[Exception] {
       preGCTester.assertCleanup()(timeout(1000 millis))
     }
+    rdd.count()  // Defeat early collection by the JVM
 
     // Test that GC causes shuffle cleanup after dereferencing the RDD
     val postGCTester = new CleanerTester(sc, shuffleIds = Seq(0))

From 8ab1450d3995b0c3ef64c5991b88c258e17bcb12 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Fri, 15 May 2015 11:30:19 -0700
Subject: [PATCH 192/320] [SPARK-5412] [DEPLOY] Cannot bind Master to a
 specific hostname as per the documentation

Pass args to start-master.sh through to start-daemon.sh, as other scripts do, so that things like --host have effect on start-master.sh as per docs

Author: Sean Owen <sowen@cloudera.com>

Closes #6185 from srowen/SPARK-5412 and squashes the following commits:

b3ce9da [Sean Owen] Pass args to start-master.sh through to start-daemon.sh, as other scripts do, so that things like --host have effect on start-master.sh as per docs
---
 sbin/start-master.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sbin/start-master.sh b/sbin/start-master.sh
index 17fff58f4f768..a7f5d5702fd80 100755
--- a/sbin/start-master.sh
+++ b/sbin/start-master.sh
@@ -22,6 +22,8 @@
 sbin="`dirname "$0"`"
 sbin="`cd "$sbin"; pwd`"
 
+ORIGINAL_ARGS="$@"
+
 START_TACHYON=false
 
 while (( "$#" )); do
@@ -53,7 +55,9 @@ if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
   SPARK_MASTER_WEBUI_PORT=8080
 fi
 
-"$sbin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
+"$sbin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 \
+  --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
+  $ORIGINAL_ARGS
 
 if [ "$START_TACHYON" == "true" ]; then
   "$sbin"/../tachyon/bin/tachyon bootstrap-conf $SPARK_MASTER_IP

From ad92af9dbbd0c4e1224cca26da166382ed4f15b9 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Fri, 15 May 2015 11:54:13 -0700
Subject: [PATCH 193/320] [SPARK-7664] [WEBUI] DAG visualization: Fix incorrect
 link paths of DAG.

In JobPage, we can jump a StagePage when we click corresponding box of DAG viz but the link path is incorrect.

When we click a box like as follows ...
![screenshot_from_2015-05-15 19 24 25](https://cloud.githubusercontent.com/assets/4736016/7651528/5f7ef824-fb3c-11e4-9518-8c9ade2dff7a.png)

We jump to index page.
![screenshot_from_2015-05-15 19 24 45](https://cloud.githubusercontent.com/assets/4736016/7651534/6d666274-fb3c-11e4-971c-c3f2dc2b1da2.png)

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #6184 from sarutak/fix-link-path-of-dag-viz and squashes the following commits:

faba3ba [Kousuke Saruta] Fix a incorrect link
---
 .../resources/org/apache/spark/ui/static/spark-dag-viz.js    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 8138eb0d4f390..ee48fd29a6432 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -186,8 +186,9 @@ function renderDagVizForJob(svgContainer) {
     var stageId = metadata.attr("stage-id");
     var containerId = VizConstants.graphPrefix + stageId;
     // Link each graph to the corresponding stage page (TODO: handle stage attempts)
-    var stageLink = "/stages/stage/?id=" +
-      stageId.replace(VizConstants.stagePrefix, "") + "&attempt=0&expandDagViz=true";
+    var stageLink = $("#stage-" + stageId.replace(VizConstants.stagePrefix, "") + "-0")
+      .find("a")
+      .attr("href") + "&expandDagViz=true";
     var container = svgContainer
       .append("a")
       .attr("xlink:href", stageLink)

From 8e3822a0794b8b18436bd63d6859d40139a77090 Mon Sep 17 00:00:00 2001
From: ehnalis <zoltan.zvara@gmail.com>
Date: Fri, 15 May 2015 12:14:02 -0700
Subject: [PATCH 194/320] [SPARK-7504] [YARN] NullPointerException when
 initializing SparkContext in YARN-cluster mode

Added a simple checking for SparkContext.
Also added two rational checking against null at AM object.

Author: ehnalis <zoltan.zvara@gmail.com>

Closes #6083 from ehnalis/cluster and squashes the following commits:

926bd96 [ehnalis] Moved check to SparkContext.
7c89b6e [ehnalis] Remove false line.
ea2a5fe [ehnalis] [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode
4924e01 [ehnalis] [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode
39e4fa3 [ehnalis] SPARK-7504 [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode
9f287c5 [ehnalis] [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b59f562d05ead..af276e7b8d40c 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -371,6 +371,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
       throw new SparkException("An application name must be set in your configuration")
     }
 
+    // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster
+    // yarn-standalone is deprecated, but still supported
+    if ((master == "yarn-cluster" || master == "yarn-standalone") &&
+        !_conf.contains("spark.yarn.app.id")) {
+      throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " +
+        "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.")
+    }
+
     if (_conf.getBoolean("spark.logConf", false)) {
       logInfo("Spark configuration:\n" + _conf.toDebugString)
     }

From 9b6cf285d0b60848b01b6c7e3421e8ac850a88ab Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Fri, 15 May 2015 13:54:09 -0700
Subject: [PATCH 195/320] [SPARK-7296] Add timeline visualization for stages in
 the UI.

This PR builds on #2342 by adding a timeline view for the Stage page,
showing how tasks spend their time.

With this timeline, we can understand following things of a Stage.

* When/where each task ran
* Total duration of each task
* Proportion of the time each task spends

Also, this timeline view can scrollable and zoomable.

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #5843 from sarutak/stage-page-timeline and squashes the following commits:

4ba9604 [Kousuke Saruta] Fixed the order of legends
16bb552 [Kousuke Saruta] Removed border of legend area
2e5d605 [Kousuke Saruta] Modified warning message
16cb2e6 [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into stage-page-timeline
7ae328f [Kousuke Saruta] Modified code style
d5f794a [Kousuke Saruta] Fixed performance issues more
64e6642 [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into stage-page-timeline
e4a3354 [Kousuke Saruta] minor code style change
878e3b8 [Kousuke Saruta] Fixed a bug that tooltip remains
b9d8f1b [Kousuke Saruta] Fixed performance issue
ac8842b [Kousuke Saruta] Fixed layout
2319739 [Kousuke Saruta] Modified appearances more
81903ab [Kousuke Saruta] Modified appearances
a79dcc3 [Kousuke Saruta] Modified appearance
55a390c [Kousuke Saruta] Ignored scalastyle for a line-comment
29eae3e [Kousuke Saruta] limited to longest 1000 tasks
2a9e376 [Kousuke Saruta] Minor cleanup
385b6d2 [Kousuke Saruta] Added link feature
ba1ac3e [Kousuke Saruta] Fixed style
2ae8520 [Kousuke Saruta] Updated bootstrap-tooltip.js from 2.2.2 to 2.3.2
af430f1 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into stage-page-timeline
e694b8e [Kousuke Saruta] Added timeline view to StagePage
8f6610c [Kousuke Saruta] Fixed conflict
b587cf2 [Kousuke Saruta] initial commit
11fe67d [Kousuke Saruta] Fixed conflict
79ac03d [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
a91abd3 [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into timeline-viewer-feature
ef34a5b [Kousuke Saruta] Implement tooltip using bootstrap
b09d0c5 [Kousuke Saruta] Move `stroke` and `fill` attribute of rect elements to css
d3c63c8 [Kousuke Saruta] Fixed a little bit bugs
a36291b [Kousuke Saruta] Merge branch 'master' of https://github.com/apache/spark into timeline-viewer-feature
28714b6 [Kousuke Saruta] Fixed highlight issue
0dc4278 [Kousuke Saruta] Addressed most of Patrics's feedbacks
8110acf [Kousuke Saruta] Added scroll limit to Job timeline
974a64a [Kousuke Saruta] Removed unused function
ee7a7f0 [Kousuke Saruta] Refactored
6a91872 [Kousuke Saruta] Temporary commit
6693f34 [Kousuke Saruta] Added link to job/stage box in the timeline in order to move to corresponding row when we click
8f88222 [Kousuke Saruta] Added job/stage description
aeed4b1 [Kousuke Saruta] Removed stage timeline
fc1696c [Kousuke Saruta] Merge branch 'timeline-viewer-feature' of github.com:sarutak/spark into timeline-viewer-feature
999ccd4 [Kousuke Saruta] Improved scalability
0fc6a31 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
19815ae [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
68b7540 [Kousuke Saruta] Merge branch 'timeline-viewer-feature' of github.com:sarutak/spark into timeline-viewer-feature
52b5f0b [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
dec85db [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
fcdab7d [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
dab7cc1 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
09cce97 [Kousuke Saruta] Cleanuped
16f82cf [Kousuke Saruta] Cleanuped
9fb522e [Kousuke Saruta] Cleanuped
d05f2c2 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into timeline-viewer-feature
e85e9aa [Kousuke Saruta] Cleanup: Added TimelineViewUtils.scala
a76e569 [Kousuke Saruta] Removed unused setting in timeline-view.css
5ce1b21 [Kousuke Saruta] Added vis.min.js, vis.min.css and vis.map to .rat-exclude
082f709 [Kousuke Saruta] Added Timeline-View feature for Applications, Jobs and Stages
---
 .../apache/spark/ui/static/timeline-view.css  |  66 +++++-
 .../apache/spark/ui/static/timeline-view.js   |  71 +++++-
 .../org/apache/spark/ui/jobs/StagePage.scala  | 220 +++++++++++++++++-
 .../org/apache/spark/ui/jobs/StagesTab.scala  |   1 +
 4 files changed, 348 insertions(+), 10 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
index d1e6d462b836f..0f400461c5293 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
@@ -24,6 +24,65 @@ div#application-timeline, div#job-timeline {
   margin-top: 5px;
 }
 
+#task-assignment-timeline div.legend-area {
+  width: 574px;
+}
+
+#task-assignment-timeline .legend-area > svg {
+  width: 100%;
+  height: 55px;
+}
+
+#task-assignment-timeline div.item.range {
+  padding: 0px;
+  height: 26px;
+  border-width: 0;
+}
+
+.task-assignment-timeline-content {
+  width: 100%;
+}
+
+.task-assignment-timeline-duration-bar {
+  width: 100%;
+  height: 26px;
+}
+
+rect.scheduler-delay-proportion {
+  fill: #80B1D3;
+  stroke: #6B94B0;
+}
+
+rect.deserialization-time-proportion {
+  fill: #FB8072;
+  stroke: #D26B5F;
+}
+
+rect.shuffle-read-time-proportion {
+  fill: #FDB462;
+  stroke: #D39651;
+}
+
+rect.executor-runtime-proportion {
+  fill: #B3DE69;
+  stroke: #95B957;
+}
+
+rect.shuffle-write-time-proportion {
+  fill: #FFED6F;
+  stroke: #D5C65C;
+}
+
+rect.serialization-time-proportion {
+  fill: #BC80BD;
+  stroke: #9D6B9E;
+}
+
+rect.getting-result-time-proportion {
+  fill: #8DD3C7;
+  stroke: #75B0A6;
+}
+
 .vis.timeline {
   line-height: 14px;
 }
@@ -178,6 +237,10 @@ tr.corresponding-item-hover > td, tr.corresponding-item-hover > th {
   display: none;
 }
 
+#task-assignment-timeline.collapsed {
+  display: none;
+}
+
 .control-panel {
   margin-bottom: 5px;
 }
@@ -186,7 +249,8 @@ tr.corresponding-item-hover > td, tr.corresponding-item-hover > th {
   margin: 0;
 }
 
-span.expand-application-timeline, span.expand-job-timeline {
+span.expand-application-timeline, span.expand-job-timeline,
+span.expand-task-assignment-timeline {
   cursor: pointer;
 }
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
index 558beb8a5867f..e1150359bc901 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
@@ -133,6 +133,73 @@ function drawJobTimeline(groupArray, eventObjArray, startTime) {
   });
 }
 
+function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, zoomMax) {
+  var groups = new vis.DataSet(groupArray);
+  var items = new vis.DataSet(eventObjArray);
+  var container = $("#task-assignment-timeline")[0]
+  var options = {
+    groupOrder: function(a, b) {
+      return a.value - b.value
+    },
+    editable: false,
+    align: 'left',
+    selectable: false,
+    showCurrentTime: false,
+    min: minLaunchTime,
+    zoomable: false,
+    zoomMax: zoomMax
+  };
+
+  var taskTimeline = new vis.Timeline(container)
+  taskTimeline.setOptions(options);
+  taskTimeline.setGroups(groups);
+  taskTimeline.setItems(items);
+
+  taskTimeline.on("rangechange", function(prop) {
+    if (currentDisplayedTooltip !== null) {
+      $(currentDisplayedTooltip).tooltip("hide");
+    }
+  });
+
+  function getTaskIdxAndAttempt(selector) {
+    var taskIdxText = $(selector).attr("data-title");
+    var taskIdxAndAttempt = taskIdxText.match("Task (\\d+) \\(attempt (\\d+)");
+    var taskIdx = taskIdxAndAttempt[1];
+    var taskAttempt = taskIdxAndAttempt[2];
+    return taskIdx + "-" + taskAttempt;
+  }
+
+  // If we zoom up and a box moves away when the corresponding tooltip is shown,
+  // the tooltip can be remain.
+  // So, we need to hide tooltips using another mechanism.
+  var currentDisplayedTooltip = null;
+
+  $("#task-assignment-timeline").on({
+    "mouseenter": function() {
+      var taskIdxAndAttempt = getTaskIdxAndAttempt(this);
+      $("#task-" + taskIdxAndAttempt).addClass("corresponding-item-hover");
+      $(this).tooltip("show");
+      currentDisplayedTooltip = this;
+    },
+    "mouseleave" : function() {
+      var taskIdxAndAttempt = getTaskIdxAndAttempt(this);
+      $("#task-" + taskIdxAndAttempt).removeClass("corresponding-item-hover");
+      $(this).tooltip("hide");
+      currentDisplayedTooltip = null;
+    }
+  }, ".task-assignment-timeline-content");
+
+  setupZoomable('#task-assignment-timeline-zoom-lock', taskTimeline);
+
+  $("span.expand-task-assignment-timeline").click(function() {
+    $("#task-assignment-timeline").toggleClass('collapsed');
+
+     // Switch the class of the arrow from open to closed.
+    $(this).find('.expand-task-assignment-timeline-arrow').toggleClass('arrow-open');
+    $(this).find('.expand-task-assignment-timeline-arrow').toggleClass('arrow-closed');
+  });
+}
+
 function setupExecutorEventAction() {
   $(".item.box.executor").each(function () {
     $(this).hover(
@@ -147,7 +214,7 @@ function setupExecutorEventAction() {
 }
 
 function setupZoomable(id, timeline) {
-  $(id + '>input[type="checkbox"]').click(function() {
+  $(id + ' > input[type="checkbox"]').click(function() {
     if (this.checked) {
       timeline.setOptions({zoomable: true});
     } else {
@@ -155,7 +222,7 @@ function setupZoomable(id, timeline) {
     }
   });
 
-  $(id + ">span").click(function() {
+  $(id + " > span").click(function() {
     $(this).parent().find('input:checkbox').trigger('click');
   });
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 8f7b1c2f09665..1a75ea62504a0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ui.jobs
 import java.util.Date
 import javax.servlet.http.HttpServletRequest
 
+import scala.collection.mutable.HashSet
 import scala.xml.{Elem, Node, Unparsed}
 
 import org.apache.commons.lang3.StringEscapeUtils
@@ -36,6 +37,35 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
   private val progressListener = parent.progressListener
   private val operationGraphListener = parent.operationGraphListener
 
+  private val TIMELINE_LEGEND = {
+    <div class="legend-area">
+      <svg>
+        {
+          val legendPairs = List(("scheduler-delay-proportion", "Scheduler Delay"),
+            ("deserialization-time-proportion", "Task Deserialization Time"),
+            ("shuffle-read-time-proportion", "Shuffle Read Time"),
+            ("executor-runtime-proportion", "Executor Computing Time"),
+            ("shuffle-write-time-proportion", "Shuffle Write Time"),
+            ("serialization-time-proportion", "Result Serialization TIme"),
+            ("getting-result-time-proportion", "Getting Result Time"))
+
+          legendPairs.zipWithIndex.map {
+            case ((classAttr, name), index) =>
+              <rect x={5 + (index / 3) * 210 + "px"} y={10 + (index % 3) * 15 + "px"}
+                width="10px" height="10px" class={classAttr}></rect>
+                <text x={25 + (index / 3) * 210 + "px"}
+                  y={20 + (index % 3) * 15 + "px"}>{name}</text>
+          }
+        }
+      </svg>
+    </div>
+  }
+
+  // TODO: We should consider increasing the number of this parameter over time
+  // if we find that it's okay.
+  private val MAX_TIMELINE_TASKS = parent.conf.getInt("spark.ui.timeline.tasks.maximum", 1000)
+
+
   def render(request: HttpServletRequest): Seq[Node] = {
     progressListener.synchronized {
       val parameterId = request.getParameter("id")
@@ -196,7 +226,9 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value")
       def accumulableRow(acc: AccumulableInfo): Elem =
         <tr><td>{acc.name}</td><td>{acc.value}</td></tr>
-      val accumulableTable = UIUtils.listingTable(accumulableHeaders, accumulableRow,
+      val accumulableTable = UIUtils.listingTable(
+        accumulableHeaders,
+        accumulableRow,
         accumulables.values.toSeq)
 
       val taskHeadersAndCssClasses: Seq[(String, String)] =
@@ -232,10 +264,17 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
 
       val unzipped = taskHeadersAndCssClasses.unzip
 
+      val currentTime = System.currentTimeMillis()
       val taskTable = UIUtils.listingTable(
         unzipped._1,
-        taskRow(hasAccumulators, stageData.hasInput, stageData.hasOutput,
-          stageData.hasShuffleRead, stageData.hasShuffleWrite, stageData.hasBytesSpilled),
+        taskRow(
+          hasAccumulators,
+          stageData.hasInput,
+          stageData.hasOutput,
+          stageData.hasShuffleRead,
+          stageData.hasShuffleWrite,
+          stageData.hasBytesSpilled,
+          currentTime),
         tasks,
         headerClasses = unzipped._2)
       // Excludes tasks which failed and have incomplete metrics
@@ -460,25 +499,192 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         dagViz ++
         maybeExpandDagViz ++
         showAdditionalMetrics ++
+        makeTimeline(stageData.taskData.values.toSeq, currentTime) ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
         <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq ++
         maybeAccumulableTable ++
         <h4>Tasks</h4> ++ taskTable
-
       UIUtils.headerSparkPage(stageHeader, content, parent, showVisualization = true)
     }
   }
 
+  def makeTimeline(tasks: Seq[TaskUIData], currentTime: Long): Seq[Node] = {
+    val executorsSet = new HashSet[(String, String)]
+    var minLaunchTime = Long.MaxValue
+    var maxFinishTime = Long.MinValue
+
+    val executorsArrayStr =
+      tasks.sortBy(-_.taskInfo.launchTime).take(MAX_TIMELINE_TASKS).map { taskUIData =>
+        val taskInfo = taskUIData.taskInfo
+        val executorId = taskInfo.executorId
+        val host = taskInfo.host
+        executorsSet += ((executorId, host))
+
+        val classNameByStatus = {
+          if (taskInfo.successful) {
+            "succeeded"
+          } else if (taskInfo.failed) {
+            "failed"
+          } else if (taskInfo.running) {
+            "running"
+          }
+        }
+
+        val launchTime = taskInfo.launchTime
+        val finishTime = if (!taskInfo.running) taskInfo.finishTime else currentTime
+        val totalExecutionTime = finishTime - launchTime
+        minLaunchTime = launchTime.min(minLaunchTime)
+        maxFinishTime = launchTime.max(maxFinishTime)
+
+        def toProportion(time: Long) = (time.toDouble / totalExecutionTime * 100).toLong
+
+        val metricsOpt = taskUIData.taskMetrics
+        val shuffleReadTime =
+          metricsOpt.flatMap(_.shuffleReadMetrics.map(_.fetchWaitTime)).getOrElse(0L)
+        val shuffleReadTimeProportion = toProportion(shuffleReadTime)
+        val shuffleWriteTime =
+          (metricsOpt.flatMap(_.shuffleWriteMetrics
+            .map(_.shuffleWriteTime)).getOrElse(0L) / 1e6).toLong
+        val shuffleWriteTimeProportion = toProportion(shuffleWriteTime)
+        val executorComputingTime = metricsOpt.map(_.executorRunTime).getOrElse(0L) -
+          shuffleReadTime - shuffleWriteTime
+        val executorComputingTimeProportion = toProportion(executorComputingTime)
+        val serializationTime = metricsOpt.map(_.resultSerializationTime).getOrElse(0L)
+        val serializationTimeProportion = toProportion(serializationTime)
+        val deserializationTime = metricsOpt.map(_.executorDeserializeTime).getOrElse(0L)
+        val deserializationTimeProportion = toProportion(deserializationTime)
+        val gettingResultTime = getGettingResultTime(taskUIData.taskInfo)
+        val gettingResultTimeProportion = toProportion(gettingResultTime)
+        val schedulerDelay = totalExecutionTime -
+          (executorComputingTime + shuffleReadTime + shuffleWriteTime +
+            serializationTime + deserializationTime + gettingResultTime)
+        val schedulerDelayProportion =
+          (100 - executorComputingTimeProportion - shuffleReadTimeProportion -
+            shuffleWriteTimeProportion - serializationTimeProportion -
+            deserializationTimeProportion - gettingResultTimeProportion)
+
+        val schedulerDelayProportionPos = 0
+        val deserializationTimeProportionPos =
+          schedulerDelayProportionPos + schedulerDelayProportion
+        val shuffleReadTimeProportionPos =
+          deserializationTimeProportionPos + deserializationTimeProportion
+        val executorRuntimeProportionPos =
+          shuffleReadTimeProportionPos + shuffleReadTimeProportion
+        val shuffleWriteTimeProportionPos =
+          executorRuntimeProportionPos + executorComputingTimeProportion
+        val serializationTimeProportionPos =
+          shuffleWriteTimeProportionPos + shuffleWriteTimeProportion
+        val gettingResultTimeProportionPos =
+          serializationTimeProportionPos + serializationTimeProportion
+
+        val index = taskInfo.index
+        val attempt = taskInfo.attempt
+        val timelineObject =
+          s"""
+             {
+               'className': 'task task-assignment-timeline-object $classNameByStatus',
+               'group': '$executorId',
+               'content': '<div class="task-assignment-timeline-content"' +
+                 'data-toggle="tooltip" data-placement="top"' +
+                 'data-html="true" data-container="body"' +
+                 'data-title="${s"Task " + index + " (attempt " + attempt + ")"}<br>' +
+                 'Status: ${taskInfo.status}<br>' +
+                 'Launch Time: ${UIUtils.formatDate(new Date(launchTime))}' +
+                 '${
+                     if (!taskInfo.running) {
+                       s"""<br>Finish Time: ${UIUtils.formatDate(new Date(finishTime))}"""
+                     } else {
+                        ""
+                      }
+                   }' +
+                 '<br>Scheduler Delay: $schedulerDelay ms' +
+                 '<br>Task Deserialization Time: ${UIUtils.formatDuration(deserializationTime)}' +
+                 '<br>Shuffle Read Time: ${UIUtils.formatDuration(shuffleReadTime)}' +
+                 '<br>Executor Computing Time: ${UIUtils.formatDuration(executorComputingTime)}' +
+                 '<br>Shuffle Write Time: ${UIUtils.formatDuration(shuffleWriteTime)}' +
+                 '<br>Result Serialization Time: ${UIUtils.formatDuration(serializationTime)}' +
+                 '<br>Getting Result Time: ${UIUtils.formatDuration(gettingResultTime)}">' +
+                 '<svg class="task-assignment-timeline-duration-bar">' +
+                 '<rect class="scheduler-delay-proportion" ' +
+                   'x="$schedulerDelayProportionPos%" y="0px" height="26px"' +
+                   'width="$schedulerDelayProportion%""></rect>' +
+                 '<rect class="deserialization-time-proportion" '+
+                   'x="$deserializationTimeProportionPos%" y="0px" height="26px"' +
+                   'width="$deserializationTimeProportion%"></rect>' +
+                 '<rect class="shuffle-read-time-proportion" ' +
+                   'x="$shuffleReadTimeProportionPos%" y="0px" height="26px"' +
+                   'width="$shuffleReadTimeProportion%"></rect>' +
+                 '<rect class="executor-runtime-proportion" ' +
+                   'x="$executorRuntimeProportionPos%" y="0px" height="26px"' +
+                   'width="$executorComputingTimeProportion%"></rect>' +
+                 '<rect class="shuffle-write-time-proportion" ' +
+                   'x="$shuffleWriteTimeProportionPos%" y="0px" height="26px"' +
+                   'width="$shuffleWriteTimeProportion%"></rect>' +
+                 '<rect class="serialization-time-proportion" ' +
+                   'x="$serializationTimeProportionPos%" y="0px" height="26px"' +
+                   'width="$serializationTimeProportion%"></rect>' +
+                 '<rect class="getting-result-time-proportion" ' +
+                   'x="$gettingResultTimeProportionPos%" y="0px" height="26px"' +
+                   'width="$gettingResultTimeProportion%"></rect></svg>',
+               'start': new Date($launchTime),
+               'end': new Date($finishTime)
+             }
+           """
+        timelineObject
+      }.mkString("[", ",", "]")
+
+    val groupArrayStr = executorsSet.map {
+      case (executorId, host) =>
+        s"""
+            {
+              'id': '$executorId',
+              'content': '$executorId / $host',
+            }
+          """
+    }.mkString("[", ",", "]")
+
+    val maxZoom = maxFinishTime - minLaunchTime
+    <span class="expand-task-assignment-timeline">
+      <span class="expand-task-assignment-timeline-arrow arrow-closed"></span>
+      <a>Event Timeline</a>
+    </span> ++
+    <div id="task-assignment-timeline" class="collapsed">
+      {
+        if (MAX_TIMELINE_TASKS < tasks.size) {
+          <strong>
+            This stage has more than the maximum number of tasks that can be shown in the
+            visualization! Only the most recent {MAX_TIMELINE_TASKS} tasks
+            (of {tasks.size} total) are shown.
+          </strong>
+        } else {
+          Seq.empty
+        }
+      }
+      <div class="control-panel">
+        <div id="task-assignment-timeline-zoom-lock">
+          <input type="checkbox"></input>
+          <span>Enable zooming</span>
+        </div>
+      </div>
+      {TIMELINE_LEGEND}
+    </div> ++
+    <script type="text/javascript">
+      {Unparsed(s"drawTaskAssignmentTimeline(" +
+      s"$groupArrayStr, $executorsArrayStr, $minLaunchTime, $maxZoom)")}
+    </script>
+  }
+
   def taskRow(
       hasAccumulators: Boolean,
       hasInput: Boolean,
       hasOutput: Boolean,
       hasShuffleRead: Boolean,
       hasShuffleWrite: Boolean,
-      hasBytesSpilled: Boolean)(taskData: TaskUIData): Seq[Node] = {
+      hasBytesSpilled: Boolean,
+      currentTime: Long)(taskData: TaskUIData): Seq[Node] = {
     taskData match { case TaskUIData(info, metrics, errorMessage) =>
-      val duration = if (info.status == "RUNNING") info.timeRunning(System.currentTimeMillis())
+      val duration = if (info.status == "RUNNING") info.timeRunning(currentTime)
         else metrics.map(_.executorRunTime).getOrElse(1L)
       val formatDuration = if (info.status == "RUNNING") UIUtils.formatDuration(duration)
         else metrics.map(m => UIUtils.formatDuration(m.executorRunTime)).getOrElse("")
@@ -542,7 +748,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val diskBytesSpilledSortable = maybeDiskBytesSpilled.map(_.toString).getOrElse("")
       val diskBytesSpilledReadable = maybeDiskBytesSpilled.map(Utils.bytesToString).getOrElse("")
 
-      <tr>
+      <tr id={"task-" + info.index + "-" + info.attempt}>
         <td>{info.index}</td>
         <td>{info.taskId}</td>
         <td sorttable_customkey={info.attempt.toString}>{
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index 55169956d8304..5989f0035b270 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -25,6 +25,7 @@ import org.apache.spark.ui.{SparkUI, SparkUITab}
 /** Web UI showing progress status of all stages in the given SparkContext. */
 private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages") {
   val sc = parent.sc
+  val conf = parent.conf
   val killEnabled = parent.killEnabled
   val progressListener = parent.jobProgressListener
   val operationGraphListener = parent.operationGraphListener

From 50da9e89161faa0ecdc1feb3ffee6c822a742034 Mon Sep 17 00:00:00 2001
From: qhuang <qian.huang@intel.com>
Date: Fri, 15 May 2015 14:06:16 -0700
Subject: [PATCH 196/320] [SPARK-7226] [SPARKR] Support math functions in R
 DataFrame

Author: qhuang <qian.huang@intel.com>

Closes #6170 from hqzizania/master and squashes the following commits:

f20c39f [qhuang] add tests units and fixes
2a7d121 [qhuang] use a function name more familiar to R users
07aa72e [qhuang] Support math functions in R DataFrame
---
 R/pkg/NAMESPACE                  | 23 ++++++++++++++++++++
 R/pkg/R/column.R                 | 36 +++++++++++++++++++++++++++++---
 R/pkg/R/generics.R               | 20 ++++++++++++++++++
 R/pkg/inst/tests/test_sparkSQL.R | 24 +++++++++++++++++++++
 4 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ba29614e7b179..64ffdcffc9caf 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -59,33 +59,56 @@ exportMethods("arrange",
 exportClasses("Column")
 
 exportMethods("abs",
+              "acos",
               "alias",
               "approxCountDistinct",
               "asc",
+              "asin",
+              "atan",
+              "atan2",
               "avg",
               "cast",
+              "cbrt",
+              "ceiling",
               "contains",
+              "cos",
+              "cosh",
               "countDistinct",
               "desc",
               "endsWith",
+              "exp",
+              "expm1",
+              "floor",
               "getField",
               "getItem",
+              "hypot",
               "isNotNull",
               "isNull",
               "last",
               "like",
+              "log",
+              "log10",
+              "log1p",
               "lower",
               "max",
               "mean",
               "min",
               "n",
               "n_distinct",
+              "rint",
               "rlike",
+              "sign",
+              "sin",
+              "sinh",
               "sqrt",
               "startsWith",
               "substr",
               "sum",
               "sumDistinct",
+              "tan",
+              "tanh",
+              "toDegrees",
+              "toRadians",
               "upper")
 
 exportClasses("GroupedData")
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 9a68445ab451a..80e92d3105a36 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -55,12 +55,17 @@ operators <- list(
   "+" = "plus", "-" = "minus", "*" = "multiply", "/" = "divide", "%%" = "mod",
   "==" = "equalTo", ">" = "gt", "<" = "lt", "!=" = "notEqual", "<=" = "leq", ">=" = "geq",
   # we can not override `&&` and `||`, so use `&` and `|` instead
-  "&" = "and", "|" = "or" #, "!" = "unary_$bang"
+  "&" = "and", "|" = "or", #, "!" = "unary_$bang"
+  "^" = "pow"
 )
 column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
 column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
 functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
-               "first", "last", "lower", "upper", "sumDistinct")
+               "first", "last", "lower", "upper", "sumDistinct",
+               "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
+               "expm1", "floor", "log", "log10", "log1p", "rint", "sign",
+               "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
+binary_mathfunctions<- c("atan2", "hypot")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -76,7 +81,11 @@ createOperator <- function(op) {
                 if (class(e2) == "Column") {
                   e2 <- e2@jc
                 }
-                callJMethod(e1@jc, operators[[op]], e2)
+                if (op == "^") {
+                  jc <- callJStatic("org.apache.spark.sql.functions", operators[[op]], e1@jc, e2)
+                } else {
+                  callJMethod(e1@jc, operators[[op]], e2)
+                }
               }
               column(jc)
             })
@@ -106,11 +115,29 @@ createStaticFunction <- function(name) {
   setMethod(name,
             signature(x = "Column"),
             function(x) {
+              if (name == "ceiling") {
+                  name <- "ceil"
+              }
+              if (name == "sign") {
+                  name <- "signum"
+              }
               jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
               column(jc)
             })
 }
 
+createBinaryMathfunctions <- function(name) {
+  setMethod(name,
+            signature(y = "Column"),
+            function(y, x) {
+              if (class(x) == "Column") {
+                x <- x@jc
+              }
+              jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
+              column(jc)
+            })
+}
+
 createMethods <- function() {
   for (op in names(operators)) {
     createOperator(op)
@@ -124,6 +151,9 @@ createMethods <- function() {
   for (x in functions) {
     createStaticFunction(x)
   }
+  for (name in binary_mathfunctions) {
+    createBinaryMathfunctions(name)
+  }
 }
 
 createMethods()
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6d2bfb1181e5a..a23d3b217b2fd 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -552,6 +552,10 @@ setGeneric("avg", function(x, ...) { standardGeneric("avg") })
 #' @export
 setGeneric("cast", function(x, dataType) { standardGeneric("cast") })
 
+#' @rdname column
+#' @export
+setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
+
 #' @rdname column
 #' @export
 setGeneric("contains", function(x, ...) { standardGeneric("contains") })
@@ -575,6 +579,10 @@ setGeneric("getField", function(x, ...) { standardGeneric("getField") })
 #' @export
 setGeneric("getItem", function(x, ...) { standardGeneric("getItem") })
 
+#' @rdname column
+#' @export
+setGeneric("hypot", function(y, x) { standardGeneric("hypot") })
+
 #' @rdname column
 #' @export
 setGeneric("isNull", function(x) { standardGeneric("isNull") })
@@ -603,6 +611,10 @@ setGeneric("n", function(x) { standardGeneric("n") })
 #' @export
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
+#' @rdname column
+#' @export
+setGeneric("rint", function(x, ...) { standardGeneric("rint") })
+
 #' @rdname column
 #' @export
 setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
@@ -615,6 +627,14 @@ setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") })
 #' @export
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
+#' @rdname column
+#' @export
+setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
+
+#' @rdname column
+#' @export
+setGeneric("toRadians", function(x) { standardGeneric("toRadians") })
+
 #' @rdname column
 #' @export
 setGeneric("upper", function(x) { standardGeneric("upper") })
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 1109e8fdba3fd..3e5658eb5b24b 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -530,6 +530,7 @@ test_that("column operators", {
   c2 <- (- c + 1 - 2) * 3 / 4.0
   c3 <- (c + c2 - c2) * c2 %% c2
   c4 <- (c > c2) & (c2 <= c3) | (c == c2) & (c2 != c3)
+  c5 <- c2 ^ c3 ^ c4
 })
 
 test_that("column functions", {
@@ -538,6 +539,29 @@ test_that("column functions", {
   c3 <- lower(c) + upper(c) + first(c) + last(c)
   c4 <- approxCountDistinct(c) + countDistinct(c) + cast(c, "string")
   c5 <- n(c) + n_distinct(c)
+  c5 <- acos(c) + asin(c) + atan(c) + cbrt(c) 
+  c6 <- ceiling(c) + cos(c) + cosh(c) + exp(c) + expm1(c)
+  c7 <- floor(c) + log(c) + log10(c) + log1p(c) + rint(c)
+  c8 <- sign(c) + sin(c) + sinh(c) + tan(c) + tanh(c)
+  c9 <- toDegrees(c) + toRadians(c)
+})
+
+test_that("column binary mathfunctions", {
+  lines <- c("{\"a\":1, \"b\":5}",
+             "{\"a\":2, \"b\":6}",
+             "{\"a\":3, \"b\":7}",
+             "{\"a\":4, \"b\":8}")
+  jsonPathWithDup <- tempfile(pattern="sparkr-test", fileext=".tmp")
+  writeLines(lines, jsonPathWithDup)
+  df <- jsonFile(sqlCtx, jsonPathWithDup)
+  expect_equal(collect(select(df, atan2(df$a, df$b)))[1, "ATAN2(a, b)"], atan2(1, 5))
+  expect_equal(collect(select(df, atan2(df$a, df$b)))[2, "ATAN2(a, b)"], atan2(2, 6))
+  expect_equal(collect(select(df, atan2(df$a, df$b)))[3, "ATAN2(a, b)"], atan2(3, 7))
+  expect_equal(collect(select(df, atan2(df$a, df$b)))[4, "ATAN2(a, b)"], atan2(4, 8))
+  expect_equal(collect(select(df, hypot(df$a, df$b)))[1, "HYPOT(a, b)"], sqrt(1^2 + 5^2))
+  expect_equal(collect(select(df, hypot(df$a, df$b)))[2, "HYPOT(a, b)"], sqrt(2^2 + 6^2))
+  expect_equal(collect(select(df, hypot(df$a, df$b)))[3, "HYPOT(a, b)"], sqrt(3^2 + 7^2))
+  expect_equal(collect(select(df, hypot(df$a, df$b)))[4, "HYPOT(a, b)"], sqrt(4^2 + 8^2))
 })
 
 test_that("string operators", {

From 6e77105e11ff81bfd84561f4e1121111f686df21 Mon Sep 17 00:00:00 2001
From: Iulian Dragos <jaguarul@gmail.com>
Date: Fri, 15 May 2015 14:57:29 -0700
Subject: [PATCH 197/320] [SPARK-7677] [STREAMING] Add Kafka modules to the
 2.11 build.

This is somewhat related to [SPARK-6154](https://issues.apache.org/jira/browse/SPARK-6154), though it only touches Kafka, not the jline dependency for thriftserver.

I tested this locally on 2.11 (./run-tests) and everything looked good (I had to disable mima, because `MimaBuild` harcodes 2.10 for the previous version -- that's another PR).

Author: Iulian Dragos <jaguarul@gmail.com>

Closes #6149 from dragos/issue/spark-2.11-kafka and squashes the following commits:

aa15d99 [Iulian Dragos] Add Kafka modules to the 2.11 build.
---
 pom.xml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 91d1d843c762a..86aa0a9fa134c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -107,6 +107,8 @@
     <module>examples</module>
     <module>repl</module>
     <module>launcher</module>
+    <module>external/kafka</module>
+    <module>external/kafka-assembly</module>
   </modules>
 
   <properties>
@@ -1757,10 +1759,6 @@
         <jline.version>${scala.version}</jline.version>
         <jline.groupid>org.scala-lang</jline.groupid>
       </properties>
-      <modules>
-        <module>external/kafka</module>
-        <module>external/kafka-assembly</module>
-      </modules>
     </profile>
 
     <profile>

From c8696337e2a5878f3171eb574c0a1365d45814c9 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 15 May 2015 15:05:04 -0700
Subject: [PATCH 198/320] [SPARK-7556] [ML] [DOC] Add user guide for spark.ml
 Binarizer, including Scala, Java and Python examples

JIRA: https://issues.apache.org/jira/browse/SPARK-7556

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6116 from viirya/binarizer_doc and squashes the following commits:

40cb677 [Liang-Chi Hsieh] Better print out.
5b7ef1d [Liang-Chi Hsieh] Make examples more clear.
1bf9c09 [Liang-Chi Hsieh] For comments.
6cf8cba [Liang-Chi Hsieh] Add user guide for Binarizer.
---
 docs/ml-features.md | 84 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 0cbebcb739b14..5df61dd36a070 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -183,6 +183,90 @@ for words_label in wordsDataFrame.select("words", "label").take(3):
 </div>
 </div>
 
+## Binarizer
+
+Binarization is the process of thresholding numerical features to binary features. As some probabilistic estimators make assumption that the input data is distributed according to [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution), a binarizer is useful for pre-processing the input data with continuous numerical features.
+
+A simple [Binarizer](api/scala/index.html#org.apache.spark.ml.feature.Binarizer) class provides this functionality. Besides the common parameters of `inputCol` and `outputCol`, `Binarizer` has the parameter `threshold` used for binarizing continuous numerical features. The features greater than the threshold, will be binarized to 1.0. The features equal to or less than the threshold, will be binarized to 0.0. The example below shows how to binarize numerical features.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.Binarizer
+import org.apache.spark.sql.DataFrame
+
+val data = Array(
+  (0, 0.1),
+  (1, 0.8),
+  (2, 0.2)
+)
+val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
+
+val binarizer: Binarizer = new Binarizer()
+  .setInputCol("feature")
+  .setOutputCol("binarized_feature")
+  .setThreshold(0.5)
+
+val binarizedDataFrame = binarizer.transform(dataFrame)
+val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
+binarizedFeatures.collect().foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.Binarizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(0, 0.1),
+  RowFactory.create(1, 0.8),
+  RowFactory.create(2, 0.2)
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+  new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
+});
+DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
+Binarizer binarizer = new Binarizer()
+  .setInputCol("feature")
+  .setOutputCol("binarized_feature")
+  .setThreshold(0.5);
+DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
+DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
+for (Row r : binarizedFeatures.collect()) {
+  Double binarized_value = r.getDouble(0);
+  System.out.println(binarized_value);
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import Binarizer
+
+continuousDataFrame = sqlContext.createDataFrame([
+  (0, 0.1),
+  (1, 0.8),
+  (2, 0.2)
+], ["label", "feature"])
+binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
+binarizedDataFrame = binarizer.transform(continuousDataFrame)
+binarizedFeatures = binarizedDataFrame.select("binarized_feature")
+for binarized_feature, in binarizedFeatures.collect():
+  print binarized_feature
+{% endhighlight %}
+</div>
+</div>
 
 # Feature Selectors
 

From e74545647684b3047248ca3cfee894ac5378dead Mon Sep 17 00:00:00 2001
From: Kay Ousterhout <kayousterhout@gmail.com>
Date: Fri, 15 May 2015 17:45:14 -0700
Subject: [PATCH 199/320] [SPARK-7676] Bug fix and cleanup of stage timeline
 view

cc pwendell sarutak

This commit cleans up some unnecessary code, eliminates the feature where when you mouse-over a box in the timeline, the corresponding task is highlighted in the table (because that feature is only useful in the rare case when you have a very small number of tasks, in which case it's easy to figure out the mapping anyway), and fixes a bug where nothing shows up if you try to visualize a stage with only 1 task.

Author: Kay Ousterhout <kayousterhout@gmail.com>

Closes #6202 from kayousterhout/SPARK-7676 and squashes the following commits:

dfd29d4 [Kay Ousterhout] [SPARK-7676] Bug fix and cleanup of stage timeline view
---
 .../apache/spark/ui/static/timeline-view.js   | 48 +++++++------------
 .../org/apache/spark/ui/jobs/StagePage.scala  | 19 ++------
 2 files changed, 20 insertions(+), 47 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
index e1150359bc901..604c29994145a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
@@ -133,7 +133,7 @@ function drawJobTimeline(groupArray, eventObjArray, startTime) {
   });
 }
 
-function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, zoomMax) {
+function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, maxFinishTime) {
   var groups = new vis.DataSet(groupArray);
   var items = new vis.DataSet(eventObjArray);
   var container = $("#task-assignment-timeline")[0]
@@ -146,8 +146,8 @@ function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, zo
     selectable: false,
     showCurrentTime: false,
     min: minLaunchTime,
-    zoomable: false,
-    zoomMax: zoomMax
+    max: maxFinishTime,
+    zoomable: false
   };
 
   var taskTimeline = new vis.Timeline(container)
@@ -155,48 +155,32 @@ function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, zo
   taskTimeline.setGroups(groups);
   taskTimeline.setItems(items);
 
-  taskTimeline.on("rangechange", function(prop) {
-    if (currentDisplayedTooltip !== null) {
-      $(currentDisplayedTooltip).tooltip("hide");
-    }
-  });
-
-  function getTaskIdxAndAttempt(selector) {
-    var taskIdxText = $(selector).attr("data-title");
-    var taskIdxAndAttempt = taskIdxText.match("Task (\\d+) \\(attempt (\\d+)");
-    var taskIdx = taskIdxAndAttempt[1];
-    var taskAttempt = taskIdxAndAttempt[2];
-    return taskIdx + "-" + taskAttempt;
-  }
-
-  // If we zoom up and a box moves away when the corresponding tooltip is shown,
-  // the tooltip can be remain.
-  // So, we need to hide tooltips using another mechanism.
+  // If a user zooms while a tooltip is displayed, the user may zoom such that the cursor is no
+  // longer over the task that the tooltip corresponds to. So, when a user zooms, we should hide
+  // any currently displayed tooltips.
   var currentDisplayedTooltip = null;
-
   $("#task-assignment-timeline").on({
     "mouseenter": function() {
-      var taskIdxAndAttempt = getTaskIdxAndAttempt(this);
-      $("#task-" + taskIdxAndAttempt).addClass("corresponding-item-hover");
-      $(this).tooltip("show");
       currentDisplayedTooltip = this;
     },
-    "mouseleave" : function() {
-      var taskIdxAndAttempt = getTaskIdxAndAttempt(this);
-      $("#task-" + taskIdxAndAttempt).removeClass("corresponding-item-hover");
-      $(this).tooltip("hide");
+    "mouseleave": function() {
       currentDisplayedTooltip = null;
     }
   }, ".task-assignment-timeline-content");
+  taskTimeline.on("rangechange", function(prop) {
+    if (currentDisplayedTooltip !== null) {
+      $(currentDisplayedTooltip).tooltip("hide");
+    }
+  });
 
-  setupZoomable('#task-assignment-timeline-zoom-lock', taskTimeline);
+  setupZoomable("#task-assignment-timeline-zoom-lock", taskTimeline);
 
   $("span.expand-task-assignment-timeline").click(function() {
-    $("#task-assignment-timeline").toggleClass('collapsed');
+    $("#task-assignment-timeline").toggleClass("collapsed");
 
      // Switch the class of the arrow from open to closed.
-    $(this).find('.expand-task-assignment-timeline-arrow').toggleClass('arrow-open');
-    $(this).find('.expand-task-assignment-timeline-arrow').toggleClass('arrow-closed');
+    $(this).find(".expand-task-assignment-timeline-arrow").toggleClass("arrow-open");
+    $(this).find(".expand-task-assignment-timeline-arrow").toggleClass("arrow-closed");
   });
 }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 1a75ea62504a0..31e2e7fba9783 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -521,21 +521,11 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         val host = taskInfo.host
         executorsSet += ((executorId, host))
 
-        val classNameByStatus = {
-          if (taskInfo.successful) {
-            "succeeded"
-          } else if (taskInfo.failed) {
-            "failed"
-          } else if (taskInfo.running) {
-            "running"
-          }
-        }
-
         val launchTime = taskInfo.launchTime
         val finishTime = if (!taskInfo.running) taskInfo.finishTime else currentTime
         val totalExecutionTime = finishTime - launchTime
         minLaunchTime = launchTime.min(minLaunchTime)
-        maxFinishTime = launchTime.max(maxFinishTime)
+        maxFinishTime = finishTime.max(maxFinishTime)
 
         def toProportion(time: Long) = (time.toDouble / totalExecutionTime * 100).toLong
 
@@ -583,7 +573,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
         val timelineObject =
           s"""
              {
-               'className': 'task task-assignment-timeline-object $classNameByStatus',
+               'className': 'task task-assignment-timeline-object',
                'group': '$executorId',
                'content': '<div class="task-assignment-timeline-content"' +
                  'data-toggle="tooltip" data-placement="top"' +
@@ -644,7 +634,6 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
           """
     }.mkString("[", ",", "]")
 
-    val maxZoom = maxFinishTime - minLaunchTime
     <span class="expand-task-assignment-timeline">
       <span class="expand-task-assignment-timeline-arrow arrow-closed"></span>
       <a>Event Timeline</a>
@@ -671,7 +660,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
     </div> ++
     <script type="text/javascript">
       {Unparsed(s"drawTaskAssignmentTimeline(" +
-      s"$groupArrayStr, $executorsArrayStr, $minLaunchTime, $maxZoom)")}
+      s"$groupArrayStr, $executorsArrayStr, $minLaunchTime, $maxFinishTime)")}
     </script>
   }
 
@@ -748,7 +737,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val diskBytesSpilledSortable = maybeDiskBytesSpilled.map(_.toString).getOrElse("")
       val diskBytesSpilledReadable = maybeDiskBytesSpilled.map(Utils.bytesToString).getOrElse("")
 
-      <tr id={"task-" + info.index + "-" + info.attempt}>
+      <tr>
         <td>{info.index}</td>
         <td>{info.taskId}</td>
         <td sorttable_customkey={info.attempt.toString}>{

From 2c04c8a1aed34cce420b3d30d9e885daa6e03d74 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Fri, 15 May 2015 18:06:01 -0700
Subject: [PATCH 200/320] [SPARK-7563] OutputCommitCoordinator.stop() should
 only run on the driver

This fixes a bug where an executor that exits can cause the driver's OutputCommitCoordinator to stop. To fix this, we use an `isDriver` flag and check it in `stop()`.

See https://issues.apache.org/jira/browse/SPARK-7563 for more details.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6197 from JoshRosen/SPARK-7563 and squashes the following commits:

04b2cc5 [Josh Rosen] [SPARK-7563] OutputCommitCoordinator.stop() should only be executed on the driver
---
 core/src/main/scala/org/apache/spark/SparkEnv.scala    |  2 +-
 .../spark/scheduler/OutputCommitCoordinator.scala      | 10 ++++++----
 .../spark/scheduler/OutputCommitCoordinatorSuite.scala |  2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index a5d831c7e68ad..327114542880d 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -379,7 +379,7 @@ object SparkEnv extends Logging {
     }
 
     val outputCommitCoordinator = mockOutputCommitCoordinator.getOrElse {
-      new OutputCommitCoordinator(conf)
+      new OutputCommitCoordinator(conf, isDriver)
     }
     val outputCommitCoordinatorRef = registerOrLookupEndpoint("OutputCommitCoordinator",
       new OutputCommitCoordinatorEndpoint(rpcEnv, outputCommitCoordinator))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
index 0b1d47cff3746..8321037cdc026 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
@@ -38,7 +38,7 @@ private case class AskPermissionToCommitOutput(stage: Int, task: Long, taskAttem
  * This class was introduced in SPARK-4879; see that JIRA issue (and the associated pull requests)
  * for an extensive design discussion.
  */
-private[spark] class OutputCommitCoordinator(conf: SparkConf) extends Logging {
+private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean) extends Logging {
 
   // Initialized by SparkEnv
   var coordinatorRef: Option[RpcEndpointRef] = None
@@ -129,9 +129,11 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf) extends Logging {
   }
 
   def stop(): Unit = synchronized {
-    coordinatorRef.foreach(_ send StopCoordinator)
-    coordinatorRef = None
-    authorizedCommittersByStage.clear()
+    if (isDriver) {
+      coordinatorRef.foreach(_ send StopCoordinator)
+      coordinatorRef = None
+      authorizedCommittersByStage.clear()
+    }
   }
 
   // Marked private[scheduler] instead of private so this can be mocked in tests
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index cf97707946706..7078a7a12232a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -81,7 +81,7 @@ class OutputCommitCoordinatorSuite extends FunSuite with BeforeAndAfter {
           conf: SparkConf,
           isLocal: Boolean,
           listenerBus: LiveListenerBus): SparkEnv = {
-        outputCommitCoordinator = spy(new OutputCommitCoordinator(conf))
+        outputCommitCoordinator = spy(new OutputCommitCoordinator(conf, isDriver = true))
         // Use Mockito.spy() to maintain the default infrastructure everywhere else.
         // This mocking allows us to control the coordinator responses in test cases.
         SparkEnv.createDriverEnv(conf, isLocal, listenerBus, Some(outputCommitCoordinator))

From cc12a86fb049f2be1f45baf461d202ec356ccf8f Mon Sep 17 00:00:00 2001
From: Ram Sriharsha <rsriharsha@hw11853.local>
Date: Fri, 15 May 2015 19:33:20 -0700
Subject: [PATCH 201/320] [SPARK-7575] [ML] [DOC] Example code for OneVsRest

Java and Scala examples for OneVsRest. Fixes the base classifier to be Logistic Regression and accepts the configuration parameters of the base classifier.

Author: Ram Sriharsha <rsriharsha@hw11853.local>

Closes #6115 from harsha2010/SPARK-7575 and squashes the following commits:

87ad3c7 [Ram Sriharsha] extra line
f5d9891 [Ram Sriharsha] Merge branch 'master' into SPARK-7575
7076084 [Ram Sriharsha] cleanup
dfd660c [Ram Sriharsha] cleanup
8703e4f [Ram Sriharsha] update doc
cb23995 [Ram Sriharsha] fix commandline options for JavaOneVsRestExample
69e91f8 [Ram Sriharsha] cleanup
7f4e127 [Ram Sriharsha] cleanup
d4c40d0 [Ram Sriharsha] Code Review fixes
461eb38 [Ram Sriharsha] cleanup
e0106d9 [Ram Sriharsha] Fix typo
935cf56 [Ram Sriharsha] Try to match Java and Scala Example Commandline options
5323ff9 [Ram Sriharsha] cleanup
196a59a [Ram Sriharsha] cleanup
6adfa0c [Ram Sriharsha] Style Fix
8cfc5d5 [Ram Sriharsha] [SPARK-7575] Example code for OneVsRest
---
 .../examples/ml/JavaOneVsRestExample.java     | 236 ++++++++++++++++++
 .../spark/examples/ml/OneVsRestExample.scala  | 185 ++++++++++++++
 2 files changed, 421 insertions(+)
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java
new file mode 100644
index 0000000000000..75063dbf800d8
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.commons.cli.*;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.OneVsRest;
+import org.apache.spark.ml.classification.OneVsRestModel;
+import org.apache.spark.ml.util.MetadataUtils;
+import org.apache.spark.mllib.evaluation.MulticlassMetrics;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.rdd.RDD;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.StructField;
+
+/**
+ * An example runner for Multiclass to Binary Reduction with One Vs Rest.
+ * The example uses Logistic Regression as the base classifier. All parameters that
+ * can be specified on the base classifier can be passed in to the runner options.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaOneVsRestExample [options]
+ * </pre>
+ */
+public class JavaOneVsRestExample {
+
+  private static class Params {
+    String input;
+    String testInput = null;
+    Integer maxIter = 100;
+    double tol = 1E-6;
+    boolean fitIntercept = true;
+    Double regParam = null;
+    Double elasticNetParam = null;
+    double fracTest = 0.2;
+  }
+
+  public static void main(String[] args) {
+    // parse the arguments
+    Params params = parse(args);
+    SparkConf conf = new SparkConf().setAppName("JavaOneVsRestExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    // configure the base classifier
+    LogisticRegression classifier = new LogisticRegression()
+      .setMaxIter(params.maxIter)
+      .setTol(params.tol)
+      .setFitIntercept(params.fitIntercept);
+
+    if (params.regParam != null) {
+      classifier.setRegParam(params.regParam);
+    }
+    if (params.elasticNetParam != null) {
+      classifier.setElasticNetParam(params.elasticNetParam);
+    }
+
+    // instantiate the One Vs Rest Classifier
+    OneVsRest ovr = new OneVsRest().setClassifier(classifier);
+
+    String input = params.input;
+    RDD<LabeledPoint> inputData = MLUtils.loadLibSVMFile(jsc.sc(), input);
+    RDD<LabeledPoint> train;
+    RDD<LabeledPoint> test;
+
+    // compute the train/ test split: if testInput is not provided use part of input
+    String testInput = params.testInput;
+    if (testInput != null) {
+      train = inputData;
+      // compute the number of features in the training set.
+      int numFeatures = inputData.first().features().size();
+      test = MLUtils.loadLibSVMFile(jsc.sc(), testInput, numFeatures);
+    } else {
+      double f = params.fracTest;
+      RDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[]{1 - f, f}, 12345);
+      train = tmp[0];
+      test = tmp[1];
+    }
+
+    // train the multiclass model
+    DataFrame trainingDataFrame = jsql.createDataFrame(train, LabeledPoint.class);
+    OneVsRestModel ovrModel = ovr.fit(trainingDataFrame.cache());
+
+    // score the model on test data
+    DataFrame testDataFrame = jsql.createDataFrame(test, LabeledPoint.class);
+    DataFrame predictions = ovrModel.transform(testDataFrame.cache())
+      .select("prediction", "label");
+
+    // obtain metrics
+    MulticlassMetrics metrics = new MulticlassMetrics(predictions);
+    StructField predictionColSchema = predictions.schema().apply("prediction");
+    Integer numClasses = (Integer) MetadataUtils.getNumClasses(predictionColSchema).get();
+
+    // compute the false positive rate per label
+    StringBuilder results = new StringBuilder();
+    results.append("label\tfpr\n");
+    for (int label = 0; label < numClasses; label++) {
+      results.append(label);
+      results.append("\t");
+      results.append(metrics.falsePositiveRate((double) label));
+      results.append("\n");
+    }
+
+    Matrix confusionMatrix = metrics.confusionMatrix();
+    // output the Confusion Matrix
+    System.out.println("Confusion Matrix");
+    System.out.println(confusionMatrix);
+    System.out.println();
+    System.out.println(results);
+
+    jsc.stop();
+  }
+
+  private static Params parse(String[] args) {
+    Options options = generateCommandlineOptions();
+    CommandLineParser parser = new PosixParser();
+    Params params = new Params();
+
+    try {
+      CommandLine cmd = parser.parse(options, args);
+      String value;
+      if (cmd.hasOption("input")) {
+        params.input = cmd.getOptionValue("input");
+      }
+      if (cmd.hasOption("maxIter")) {
+        value = cmd.getOptionValue("maxIter");
+        params.maxIter = Integer.parseInt(value);
+      }
+      if (cmd.hasOption("tol")) {
+        value = cmd.getOptionValue("tol");
+        params.tol = Double.parseDouble(value);
+      }
+      if (cmd.hasOption("fitIntercept")) {
+        value = cmd.getOptionValue("fitIntercept");
+        params.fitIntercept = Boolean.parseBoolean(value);
+      }
+      if (cmd.hasOption("regParam")) {
+        value = cmd.getOptionValue("regParam");
+        params.regParam = Double.parseDouble(value);
+      }
+      if (cmd.hasOption("elasticNetParam")) {
+        value = cmd.getOptionValue("elasticNetParam");
+        params.elasticNetParam = Double.parseDouble(value);
+      }
+      if (cmd.hasOption("testInput")) {
+        value = cmd.getOptionValue("testInput");
+        params.testInput = value;
+      }
+      if (cmd.hasOption("fracTest")) {
+        value = cmd.getOptionValue("fracTest");
+        params.fracTest = Double.parseDouble(value);
+      }
+
+    } catch (ParseException e) {
+      printHelpAndQuit(options);
+    }
+    return params;
+  }
+
+  private static Options generateCommandlineOptions() {
+    Option input = OptionBuilder.withArgName("input")
+      .hasArg()
+      .isRequired()
+      .withDescription("input path to labeled examples. This path must be specified")
+      .create("input");
+    Option testInput = OptionBuilder.withArgName("testInput")
+      .hasArg()
+      .withDescription("input path to test examples")
+      .create("testInput");
+    Option fracTest = OptionBuilder.withArgName("testInput")
+      .hasArg()
+      .withDescription("fraction of data to hold out for testing." +
+        " If given option testInput, this option is ignored. default: 0.2")
+      .create("fracTest");
+    Option maxIter = OptionBuilder.withArgName("maxIter")
+      .hasArg()
+      .withDescription("maximum number of iterations for Logistic Regression. default:100")
+      .create("maxIter");
+    Option tol = OptionBuilder.withArgName("tol")
+      .hasArg()
+      .withDescription("the convergence tolerance of iterations " +
+        "for Logistic Regression. default: 1E-6")
+      .create("tol");
+    Option fitIntercept = OptionBuilder.withArgName("fitIntercept")
+      .hasArg()
+      .withDescription("fit intercept for logistic regression. default true")
+      .create("fitIntercept");
+    Option regParam = OptionBuilder.withArgName( "regParam" )
+      .hasArg()
+      .withDescription("the regularization parameter for Logistic Regression.")
+      .create("regParam");
+    Option elasticNetParam = OptionBuilder.withArgName("elasticNetParam" )
+      .hasArg()
+      .withDescription("the ElasticNet mixing parameter for Logistic Regression.")
+      .create("elasticNetParam");
+
+    Options options = new Options()
+      .addOption(input)
+      .addOption(testInput)
+      .addOption(fracTest)
+      .addOption(maxIter)
+      .addOption(tol)
+      .addOption(fitIntercept)
+      .addOption(regParam)
+      .addOption(elasticNetParam);
+
+    return options;
+  }
+
+  private static void printHelpAndQuit(Options options) {
+    HelpFormatter formatter = new HelpFormatter();
+    formatter.printHelp("JavaOneVsRestExample", options);
+    System.exit(-1);
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
new file mode 100644
index 0000000000000..b99d0a1246011
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import java.util.concurrent.TimeUnit.{NANOSECONDS => NANO}
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.classification.{OneVsRest, LogisticRegression}
+import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SQLContext
+
+/**
+ * An example runner for Multiclass to Binary Reduction with One Vs Rest.
+ * The example uses Logistic Regression as the base classifier. All parameters that
+ * can be specified on the base classifier can be passed in to the runner options.
+ * Run with
+ * {{{
+ * ./bin/run-example ml.OneVsRestExample [options]
+ * }}}
+ * For local mode, run
+ * {{{
+ * ./bin/spark-submit --class org.apache.spark.examples.ml.OneVsRestExample --driver-memory 1g
+ *   [examples JAR path] [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object OneVsRestExample {
+
+  case class Params private[ml] (
+      input: String = null,
+      testInput: Option[String] = None,
+      maxIter: Int = 100,
+      tol: Double = 1E-6,
+      fitIntercept: Boolean = true,
+      regParam: Option[Double] = None,
+      elasticNetParam: Option[Double] = None,
+      fracTest: Double = 0.2) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("OneVsRest Example") {
+      head("OneVsRest Example: multiclass to binary reduction using OneVsRest")
+      opt[String]("input")
+        .text("input path to labeled examples. This path must be specified")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+        s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[String]("testInput")
+        .text("input path to test dataset.  If given, option fracTest is ignored")
+        .action((x,c) => c.copy(testInput = Some(x)))
+      opt[Int]("maxIter")
+        .text(s"maximum number of iterations for Logistic Regression." +
+          s" default: ${defaultParams.maxIter}")
+        .action((x, c) => c.copy(maxIter = x))
+      opt[Double]("tol")
+        .text(s"the convergence tolerance of iterations for Logistic Regression." +
+          s" default: ${defaultParams.tol}")
+        .action((x, c) => c.copy(tol = x))
+      opt[Boolean]("fitIntercept")
+        .text(s"fit intercept for Logistic Regression." +
+        s" default: ${defaultParams.fitIntercept}")
+        .action((x, c) => c.copy(fitIntercept = x))
+      opt[Double]("regParam")
+        .text(s"the regularization parameter for Logistic Regression.")
+        .action((x,c) => c.copy(regParam = Some(x)))
+      opt[Double]("elasticNetParam")
+        .text(s"the ElasticNet mixing parameter for Logistic Regression.")
+        .action((x,c) => c.copy(elasticNetParam = Some(x)))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest >= 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).")
+        } else {
+          success
+        }
+      }
+    }
+    parser.parse(args, defaultParams).map { params =>
+      run(params)
+    }.getOrElse {
+      sys.exit(1)
+    }
+  }
+
+  private def run(params: Params) {
+    val conf = new SparkConf().setAppName(s"OneVsRestExample with $params")
+    val sc = new SparkContext(conf)
+    val inputData = MLUtils.loadLibSVMFile(sc, params.input)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._
+
+    // compute the train/test split: if testInput is not provided use part of input.
+    val data = params.testInput match {
+      case Some(t) => {
+        // compute the number of features in the training set.
+        val numFeatures = inputData.first().features.size
+        val testData = MLUtils.loadLibSVMFile(sc, t, numFeatures)
+        Array[RDD[LabeledPoint]](inputData, testData)
+      }
+      case None => {
+        val f = params.fracTest
+        inputData.randomSplit(Array(1 - f, f), seed = 12345)
+      }
+    }
+    val Array(train, test) = data.map(_.toDF().cache())
+
+    // instantiate the base classifier
+    val classifier = new LogisticRegression()
+      .setMaxIter(params.maxIter)
+      .setTol(params.tol)
+      .setFitIntercept(params.fitIntercept)
+
+    // Set regParam, elasticNetParam if specified in params
+    params.regParam.foreach(classifier.setRegParam)
+    params.elasticNetParam.foreach(classifier.setElasticNetParam)
+
+    // instantiate the One Vs Rest Classifier.
+
+    val ovr = new OneVsRest()
+    ovr.setClassifier(classifier)
+
+    // train the multiclass model.
+    val (trainingDuration, ovrModel) = time(ovr.fit(train))
+
+    // score the model on test data.
+    val (predictionDuration, predictions) = time(ovrModel.transform(test))
+
+    // evaluate the model
+    val predictionsAndLabels = predictions.select("prediction", "label")
+      .map(row => (row.getDouble(0), row.getDouble(1)))
+
+    val metrics = new MulticlassMetrics(predictionsAndLabels)
+
+    val confusionMatrix = metrics.confusionMatrix
+
+    // compute the false positive rate per label
+    val predictionColSchema = predictions.schema("prediction")
+    val numClasses = MetadataUtils.getNumClasses(predictionColSchema).get
+    val fprs = Range(0, numClasses).map(p => (p, metrics.falsePositiveRate(p.toDouble)))
+
+    println(s" Training Time ${trainingDuration} sec\n")
+
+    println(s" Prediction Time ${predictionDuration} sec\n")
+
+    println(s" Confusion Matrix\n ${confusionMatrix.toString}\n")
+
+    println("label\tfpr")
+
+    println(fprs.map {case (label, fpr) => label + "\t" + fpr}.mkString("\n"))
+
+    sc.stop()
+  }
+
+  private def time[R](block: => R): (Long, R) = {
+    val t0 = System.nanoTime()
+    val result = block    // call-by-name
+    val t1 = System.nanoTime()
+    (NANO.toSeconds(t1 - t0), result)
+  }
+}

From adfd366814499c0540a15dd6017091ba8c0f05da Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Fri, 15 May 2015 20:05:26 -0700
Subject: [PATCH 202/320] [SPARK-7073] [SQL] [PySpark] Clean up SQL data type
 hierarchy in Python

Author: Davies Liu <davies@databricks.com>

Closes #6206 from davies/sql_type and squashes the following commits:

33d6860 [Davies Liu] [SPARK-7073] [SQL] [PySpark] Clean up SQL data type hierarchy in Python
---
 python/pyspark/sql/_types.py | 76 ++++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/_types.py
index 629c3a94513b8..9e7e9f04bc35d 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/_types.py
@@ -73,56 +73,74 @@ def json(self):
 
 # This singleton pattern does not work with pickle, you will get
 # another object after pickle and unpickle
-class PrimitiveTypeSingleton(type):
-    """Metaclass for PrimitiveType"""
+class DataTypeSingleton(type):
+    """Metaclass for DataType"""
 
     _instances = {}
 
     def __call__(cls):
         if cls not in cls._instances:
-            cls._instances[cls] = super(PrimitiveTypeSingleton, cls).__call__()
+            cls._instances[cls] = super(DataTypeSingleton, cls).__call__()
         return cls._instances[cls]
 
 
-class PrimitiveType(DataType):
-    """Spark SQL PrimitiveType"""
+class NullType(DataType):
+    """Null type.
 
-    __metaclass__ = PrimitiveTypeSingleton
+    The data type representing None, used for the types that cannot be inferred.
+    """
 
+    __metaclass__ = DataTypeSingleton
 
-class NullType(PrimitiveType):
-    """Null type.
 
-    The data type representing None, used for the types that cannot be inferred.
+class AtomicType(DataType):
+    """An internal type used to represent everything that is not
+    null, UDTs, arrays, structs, and maps."""
+
+    __metaclass__ = DataTypeSingleton
+
+
+class NumericType(AtomicType):
+    """Numeric data types.
     """
 
 
-class StringType(PrimitiveType):
+class IntegralType(NumericType):
+    """Integral data types.
+    """
+
+
+class FractionalType(NumericType):
+    """Fractional data types.
+    """
+
+
+class StringType(AtomicType):
     """String data type.
     """
 
 
-class BinaryType(PrimitiveType):
+class BinaryType(AtomicType):
     """Binary (byte array) data type.
     """
 
 
-class BooleanType(PrimitiveType):
+class BooleanType(AtomicType):
     """Boolean data type.
     """
 
 
-class DateType(PrimitiveType):
+class DateType(AtomicType):
     """Date (datetime.date) data type.
     """
 
 
-class TimestampType(PrimitiveType):
+class TimestampType(AtomicType):
     """Timestamp (datetime.datetime) data type.
     """
 
 
-class DecimalType(DataType):
+class DecimalType(FractionalType):
     """Decimal (decimal.Decimal) data type.
     """
 
@@ -150,31 +168,31 @@ def __repr__(self):
             return "DecimalType()"
 
 
-class DoubleType(PrimitiveType):
+class DoubleType(FractionalType):
     """Double data type, representing double precision floats.
     """
 
 
-class FloatType(PrimitiveType):
+class FloatType(FractionalType):
     """Float data type, representing single precision floats.
     """
 
 
-class ByteType(PrimitiveType):
+class ByteType(IntegralType):
     """Byte data type, i.e. a signed integer in a single byte.
     """
     def simpleString(self):
         return 'tinyint'
 
 
-class IntegerType(PrimitiveType):
+class IntegerType(IntegralType):
     """Int data type, i.e. a signed 32-bit integer.
     """
     def simpleString(self):
         return 'int'
 
 
-class LongType(PrimitiveType):
+class LongType(IntegralType):
     """Long data type, i.e. a signed 64-bit integer.
 
     If the values are beyond the range of [-9223372036854775808, 9223372036854775807],
@@ -184,7 +202,7 @@ def simpleString(self):
         return 'bigint'
 
 
-class ShortType(PrimitiveType):
+class ShortType(IntegralType):
     """Short data type, i.e. a signed 16-bit integer.
     """
     def simpleString(self):
@@ -426,11 +444,9 @@ def __eq__(self, other):
         return type(self) == type(other)
 
 
-_all_primitive_types = dict((v.typeName(), v)
-                            for v in list(globals().values())
-                            if (type(v) is type or type(v) is PrimitiveTypeSingleton)
-                            and v.__base__ == PrimitiveType)
-
+_atomic_types = [StringType, BinaryType, BooleanType, DecimalType, FloatType, DoubleType,
+                 ByteType, ShortType, IntegerType, LongType, DateType, TimestampType]
+_all_atomic_types = dict((t.typeName(), t) for t in _atomic_types)
 _all_complex_types = dict((v.typeName(), v)
                           for v in [ArrayType, MapType, StructType])
 
@@ -444,7 +460,7 @@ def _parse_datatype_json_string(json_string):
     ...     scala_datatype = sqlContext._ssql_ctx.parseDataType(datatype.json())
     ...     python_datatype = _parse_datatype_json_string(scala_datatype.json())
     ...     assert datatype == python_datatype
-    >>> for cls in _all_primitive_types.values():
+    >>> for cls in _all_atomic_types.values():
     ...     check_datatype(cls())
 
     >>> # Simple ArrayType.
@@ -494,8 +510,8 @@ def _parse_datatype_json_string(json_string):
 
 def _parse_datatype_json_value(json_value):
     if not isinstance(json_value, dict):
-        if json_value in _all_primitive_types.keys():
-            return _all_primitive_types[json_value]()
+        if json_value in _all_atomic_types.keys():
+            return _all_atomic_types[json_value]()
         elif json_value == 'decimal':
             return DecimalType()
         elif _FIXED_DECIMAL.match(json_value):
@@ -1125,7 +1141,7 @@ def Dict(d):
         return lambda datum: dataType.deserialize(datum)
 
     elif not isinstance(dataType, StructType):
-        # no wrapper for primitive types
+        # no wrapper for atomic types
         return lambda x: x
 
     class Row(tuple):

From d7b69946cb21cd2781c9ad3e691e54b28efbbf3d Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Fri, 15 May 2015 20:09:15 -0700
Subject: [PATCH 203/320] [SPARK-7543] [SQL] [PySpark] split dataframe.py into
 multiple files

dataframe.py is splited into column.py, group.py and dataframe.py:
```
   360 column.py
  1223 dataframe.py
   183 group.py
```

Author: Davies Liu <davies@databricks.com>

Closes #6201 from davies/split_df and squashes the following commits:

fc8f5ab [Davies Liu] split dataframe.py into multiple files
---
 python/pyspark/sql/__init__.py  |   5 +-
 python/pyspark/sql/column.py    | 360 +++++++++++++++++++++++++
 python/pyspark/sql/dataframe.py | 449 +-------------------------------
 python/pyspark/sql/functions.py |   2 +-
 python/pyspark/sql/group.py     | 183 +++++++++++++
 python/run-tests                |   2 +
 6 files changed, 552 insertions(+), 449 deletions(-)
 create mode 100644 python/pyspark/sql/column.py
 create mode 100644 python/pyspark/sql/group.py

diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index 7192c89b3dc7f..19805e291e91b 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -55,8 +55,9 @@
 
 from pyspark.sql.types import Row
 from pyspark.sql.context import SQLContext, HiveContext
-from pyspark.sql.dataframe import DataFrame, GroupedData, Column, SchemaRDD, DataFrameNaFunctions
-from pyspark.sql.dataframe import DataFrameStatFunctions
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame, SchemaRDD, DataFrameNaFunctions, DataFrameStatFunctions
+from pyspark.sql.group import GroupedData
 
 __all__ = [
     'SQLContext', 'HiveContext', 'DataFrame', 'GroupedData', 'Column', 'Row',
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
new file mode 100644
index 0000000000000..fc7ad674daa5b
--- /dev/null
+++ b/python/pyspark/sql/column.py
@@ -0,0 +1,360 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+if sys.version >= '3':
+    basestring = str
+    long = int
+
+from pyspark.context import SparkContext
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.types import *
+
+__all__ = ["DataFrame", "Column", "SchemaRDD", "DataFrameNaFunctions",
+           "DataFrameStatFunctions"]
+
+
+def _create_column_from_literal(literal):
+    sc = SparkContext._active_spark_context
+    return sc._jvm.functions.lit(literal)
+
+
+def _create_column_from_name(name):
+    sc = SparkContext._active_spark_context
+    return sc._jvm.functions.col(name)
+
+
+def _to_java_column(col):
+    if isinstance(col, Column):
+        jcol = col._jc
+    else:
+        jcol = _create_column_from_name(col)
+    return jcol
+
+
+def _to_seq(sc, cols, converter=None):
+    """
+    Convert a list of Column (or names) into a JVM Seq of Column.
+
+    An optional `converter` could be used to convert items in `cols`
+    into JVM Column objects.
+    """
+    if converter:
+        cols = [converter(c) for c in cols]
+    return sc._jvm.PythonUtils.toSeq(cols)
+
+
+def _unary_op(name, doc="unary operator"):
+    """ Create a method for given unary operator """
+    def _(self):
+        jc = getattr(self._jc, name)()
+        return Column(jc)
+    _.__doc__ = doc
+    return _
+
+
+def _func_op(name, doc=''):
+    def _(self):
+        sc = SparkContext._active_spark_context
+        jc = getattr(sc._jvm.functions, name)(self._jc)
+        return Column(jc)
+    _.__doc__ = doc
+    return _
+
+
+def _bin_op(name, doc="binary operator"):
+    """ Create a method for given binary operator
+    """
+    def _(self, other):
+        jc = other._jc if isinstance(other, Column) else other
+        njc = getattr(self._jc, name)(jc)
+        return Column(njc)
+    _.__doc__ = doc
+    return _
+
+
+def _reverse_op(name, doc="binary operator"):
+    """ Create a method for binary operator (this object is on right side)
+    """
+    def _(self, other):
+        jother = _create_column_from_literal(other)
+        jc = getattr(jother, name)(self._jc)
+        return Column(jc)
+    _.__doc__ = doc
+    return _
+
+
+class Column(object):
+
+    """
+    A column in a DataFrame.
+
+    :class:`Column` instances can be created by::
+
+        # 1. Select a column out of a DataFrame
+
+        df.colName
+        df["colName"]
+
+        # 2. Create from an expression
+        df.colName + 1
+        1 / df.colName
+    """
+
+    def __init__(self, jc):
+        self._jc = jc
+
+    # arithmetic operators
+    __neg__ = _func_op("negate")
+    __add__ = _bin_op("plus")
+    __sub__ = _bin_op("minus")
+    __mul__ = _bin_op("multiply")
+    __div__ = _bin_op("divide")
+    __truediv__ = _bin_op("divide")
+    __mod__ = _bin_op("mod")
+    __radd__ = _bin_op("plus")
+    __rsub__ = _reverse_op("minus")
+    __rmul__ = _bin_op("multiply")
+    __rdiv__ = _reverse_op("divide")
+    __rtruediv__ = _reverse_op("divide")
+    __rmod__ = _reverse_op("mod")
+
+    # logistic operators
+    __eq__ = _bin_op("equalTo")
+    __ne__ = _bin_op("notEqual")
+    __lt__ = _bin_op("lt")
+    __le__ = _bin_op("leq")
+    __ge__ = _bin_op("geq")
+    __gt__ = _bin_op("gt")
+
+    # `and`, `or`, `not` cannot be overloaded in Python,
+    # so use bitwise operators as boolean operators
+    __and__ = _bin_op('and')
+    __or__ = _bin_op('or')
+    __invert__ = _func_op('not')
+    __rand__ = _bin_op("and")
+    __ror__ = _bin_op("or")
+
+    # container operators
+    __contains__ = _bin_op("contains")
+    __getitem__ = _bin_op("apply")
+
+    # bitwise operators
+    bitwiseOR = _bin_op("bitwiseOR")
+    bitwiseAND = _bin_op("bitwiseAND")
+    bitwiseXOR = _bin_op("bitwiseXOR")
+
+    def getItem(self, key):
+        """An expression that gets an item at position `ordinal` out of a list,
+         or gets an item by key out of a dict.
+
+        >>> df = sc.parallelize([([1, 2], {"key": "value"})]).toDF(["l", "d"])
+        >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
+        +----+------+
+        |l[0]|d[key]|
+        +----+------+
+        |   1| value|
+        +----+------+
+        >>> df.select(df.l[0], df.d["key"]).show()
+        +----+------+
+        |l[0]|d[key]|
+        +----+------+
+        |   1| value|
+        +----+------+
+        """
+        return self[key]
+
+    def getField(self, name):
+        """An expression that gets a field by name in a StructField.
+
+        >>> from pyspark.sql import Row
+        >>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF()
+        >>> df.select(df.r.getField("b")).show()
+        +----+
+        |r[b]|
+        +----+
+        |   b|
+        +----+
+        >>> df.select(df.r.a).show()
+        +----+
+        |r[a]|
+        +----+
+        |   1|
+        +----+
+        """
+        return self[name]
+
+    def __getattr__(self, item):
+        if item.startswith("__"):
+            raise AttributeError(item)
+        return self.getField(item)
+
+    # string methods
+    rlike = _bin_op("rlike")
+    like = _bin_op("like")
+    startswith = _bin_op("startsWith")
+    endswith = _bin_op("endsWith")
+
+    @ignore_unicode_prefix
+    def substr(self, startPos, length):
+        """
+        Return a :class:`Column` which is a substring of the column
+
+        :param startPos: start position (int or Column)
+        :param length:  length of the substring (int or Column)
+
+        >>> df.select(df.name.substr(1, 3).alias("col")).collect()
+        [Row(col=u'Ali'), Row(col=u'Bob')]
+        """
+        if type(startPos) != type(length):
+            raise TypeError("Can not mix the type")
+        if isinstance(startPos, (int, long)):
+            jc = self._jc.substr(startPos, length)
+        elif isinstance(startPos, Column):
+            jc = self._jc.substr(startPos._jc, length._jc)
+        else:
+            raise TypeError("Unexpected type: %s" % type(startPos))
+        return Column(jc)
+
+    __getslice__ = substr
+
+    @ignore_unicode_prefix
+    def inSet(self, *cols):
+        """ A boolean expression that is evaluated to true if the value of this
+        expression is contained by the evaluated values of the arguments.
+
+        >>> df[df.name.inSet("Bob", "Mike")].collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df[df.age.inSet([1, 2, 3])].collect()
+        [Row(age=2, name=u'Alice')]
+        """
+        if len(cols) == 1 and isinstance(cols[0], (list, set)):
+            cols = cols[0]
+        cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols]
+        sc = SparkContext._active_spark_context
+        jc = getattr(self._jc, "in")(_to_seq(sc, cols))
+        return Column(jc)
+
+    # order
+    asc = _unary_op("asc", "Returns a sort expression based on the"
+                           " ascending order of the given column name.")
+    desc = _unary_op("desc", "Returns a sort expression based on the"
+                             " descending order of the given column name.")
+
+    isNull = _unary_op("isNull", "True if the current expression is null.")
+    isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
+
+    def alias(self, *alias):
+        """Returns this column aliased with a new name or names (in the case of expressions that
+        return more than one column, such as explode).
+
+        >>> df.select(df.age.alias("age2")).collect()
+        [Row(age2=2), Row(age2=5)]
+        """
+
+        if len(alias) == 1:
+            return Column(getattr(self._jc, "as")(alias[0]))
+        else:
+            sc = SparkContext._active_spark_context
+            return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
+
+    @ignore_unicode_prefix
+    def cast(self, dataType):
+        """ Convert the column into type `dataType`
+
+        >>> df.select(df.age.cast("string").alias('ages')).collect()
+        [Row(ages=u'2'), Row(ages=u'5')]
+        >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
+        [Row(ages=u'2'), Row(ages=u'5')]
+        """
+        if isinstance(dataType, basestring):
+            jc = self._jc.cast(dataType)
+        elif isinstance(dataType, DataType):
+            sc = SparkContext._active_spark_context
+            ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
+            jdt = ssql_ctx.parseDataType(dataType.json())
+            jc = self._jc.cast(jdt)
+        else:
+            raise TypeError("unexpected type: %s" % type(dataType))
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    def between(self, lowerBound, upperBound):
+        """ A boolean expression that is evaluated to true if the value of this
+        expression is between the given columns.
+        """
+        return (self >= lowerBound) & (self <= upperBound)
+
+    @ignore_unicode_prefix
+    def when(self, condition, value):
+        """Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param condition: a boolean :class:`Column` expression.
+        :param value: a literal value, or a :class:`Column` expression.
+
+        """
+        sc = SparkContext._active_spark_context
+        if not isinstance(condition, Column):
+            raise TypeError("condition should be a Column")
+        v = value._jc if isinstance(value, Column) else value
+        jc = sc._jvm.functions.when(condition._jc, v)
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    def otherwise(self, value):
+        """Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param value: a literal value, or a :class:`Column` expression.
+        """
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.otherwise(value)
+        return Column(jc)
+
+    def __repr__(self):
+        return 'Column<%s>' % self._jc.toString().encode('utf8')
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SQLContext
+    import pyspark.sql.column
+    globs = pyspark.sql.column.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['sqlContext'] = SQLContext(sc)
+    globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
+        .toDF(StructType([StructField('age', IntegerType()),
+                          StructField('name', StringType())]))
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.column, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 2ed95ac8e2505..96d927b9ba35c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -25,17 +25,15 @@
 else:
     from itertools import imap as map
 
-from pyspark.context import SparkContext
 from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
 from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
 from pyspark.sql.types import *
 from pyspark.sql.types import _create_cls, _parse_datatype_json_string
+from pyspark.sql.column import Column, _to_seq, _to_java_column
 
-
-__all__ = ["DataFrame", "GroupedData", "Column", "SchemaRDD", "DataFrameNaFunctions",
-           "DataFrameStatFunctions"]
+__all__ = ["DataFrame", "SchemaRDD", "DataFrameNaFunctions", "DataFrameStatFunctions"]
 
 
 class DataFrame(object):
@@ -757,6 +755,7 @@ def groupBy(self, *cols):
         [Row(name=u'Bob', age=5, count=1), Row(name=u'Alice', age=2, count=1)]
         """
         jdf = self._jdf.groupBy(self._jcols(*cols))
+        from pyspark.sql.group import GroupedData
         return GroupedData(jdf, self.sql_ctx)
 
     def agg(self, *exprs):
@@ -1141,169 +1140,6 @@ class SchemaRDD(DataFrame):
     """
 
 
-def dfapi(f):
-    def _api(self):
-        name = f.__name__
-        jdf = getattr(self._jdf, name)()
-        return DataFrame(jdf, self.sql_ctx)
-    _api.__name__ = f.__name__
-    _api.__doc__ = f.__doc__
-    return _api
-
-
-def df_varargs_api(f):
-    def _api(self, *args):
-        name = f.__name__
-        jdf = getattr(self._jdf, name)(_to_seq(self.sql_ctx._sc, args))
-        return DataFrame(jdf, self.sql_ctx)
-    _api.__name__ = f.__name__
-    _api.__doc__ = f.__doc__
-    return _api
-
-
-class GroupedData(object):
-    """
-    A set of methods for aggregations on a :class:`DataFrame`,
-    created by :func:`DataFrame.groupBy`.
-    """
-
-    def __init__(self, jdf, sql_ctx):
-        self._jdf = jdf
-        self.sql_ctx = sql_ctx
-
-    @ignore_unicode_prefix
-    def agg(self, *exprs):
-        """Compute aggregates and returns the result as a :class:`DataFrame`.
-
-        The available aggregate functions are `avg`, `max`, `min`, `sum`, `count`.
-
-        If ``exprs`` is a single :class:`dict` mapping from string to string, then the key
-        is the column to perform aggregation on, and the value is the aggregate function.
-
-        Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions.
-
-        :param exprs: a dict mapping from column name (string) to aggregate functions (string),
-            or a list of :class:`Column`.
-
-        >>> gdf = df.groupBy(df.name)
-        >>> gdf.agg({"*": "count"}).collect()
-        [Row(name=u'Alice', COUNT(1)=1), Row(name=u'Bob', COUNT(1)=1)]
-
-        >>> from pyspark.sql import functions as F
-        >>> gdf.agg(F.min(df.age)).collect()
-        [Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
-        """
-        assert exprs, "exprs should not be empty"
-        if len(exprs) == 1 and isinstance(exprs[0], dict):
-            jdf = self._jdf.agg(exprs[0])
-        else:
-            # Columns
-            assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
-            jdf = self._jdf.agg(exprs[0]._jc,
-                                _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]]))
-        return DataFrame(jdf, self.sql_ctx)
-
-    @dfapi
-    def count(self):
-        """Counts the number of records for each group.
-
-        >>> df.groupBy(df.age).count().collect()
-        [Row(age=2, count=1), Row(age=5, count=1)]
-        """
-
-    @df_varargs_api
-    def mean(self, *cols):
-        """Computes average values for each numeric columns for each group.
-
-        :func:`mean` is an alias for :func:`avg`.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df.groupBy().mean('age').collect()
-        [Row(AVG(age)=3.5)]
-        >>> df3.groupBy().mean('age', 'height').collect()
-        [Row(AVG(age)=3.5, AVG(height)=82.5)]
-        """
-
-    @df_varargs_api
-    def avg(self, *cols):
-        """Computes average values for each numeric columns for each group.
-
-        :func:`mean` is an alias for :func:`avg`.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df.groupBy().avg('age').collect()
-        [Row(AVG(age)=3.5)]
-        >>> df3.groupBy().avg('age', 'height').collect()
-        [Row(AVG(age)=3.5, AVG(height)=82.5)]
-        """
-
-    @df_varargs_api
-    def max(self, *cols):
-        """Computes the max value for each numeric columns for each group.
-
-        >>> df.groupBy().max('age').collect()
-        [Row(MAX(age)=5)]
-        >>> df3.groupBy().max('age', 'height').collect()
-        [Row(MAX(age)=5, MAX(height)=85)]
-        """
-
-    @df_varargs_api
-    def min(self, *cols):
-        """Computes the min value for each numeric column for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df.groupBy().min('age').collect()
-        [Row(MIN(age)=2)]
-        >>> df3.groupBy().min('age', 'height').collect()
-        [Row(MIN(age)=2, MIN(height)=80)]
-        """
-
-    @df_varargs_api
-    def sum(self, *cols):
-        """Compute the sum for each numeric columns for each group.
-
-        :param cols: list of column names (string). Non-numeric columns are ignored.
-
-        >>> df.groupBy().sum('age').collect()
-        [Row(SUM(age)=7)]
-        >>> df3.groupBy().sum('age', 'height').collect()
-        [Row(SUM(age)=7, SUM(height)=165)]
-        """
-
-
-def _create_column_from_literal(literal):
-    sc = SparkContext._active_spark_context
-    return sc._jvm.functions.lit(literal)
-
-
-def _create_column_from_name(name):
-    sc = SparkContext._active_spark_context
-    return sc._jvm.functions.col(name)
-
-
-def _to_java_column(col):
-    if isinstance(col, Column):
-        jcol = col._jc
-    else:
-        jcol = _create_column_from_name(col)
-    return jcol
-
-
-def _to_seq(sc, cols, converter=None):
-    """
-    Convert a list of Column (or names) into a JVM Seq of Column.
-
-    An optional `converter` could be used to convert items in `cols`
-    into JVM Column objects.
-    """
-    if converter:
-        cols = [converter(c) for c in cols]
-    return sc._jvm.PythonUtils.toSeq(cols)
-
-
 def _to_scala_map(sc, jm):
     """
     Convert a dict into a JVM Map.
@@ -1311,282 +1147,6 @@ def _to_scala_map(sc, jm):
     return sc._jvm.PythonUtils.toScalaMap(jm)
 
 
-def _unary_op(name, doc="unary operator"):
-    """ Create a method for given unary operator """
-    def _(self):
-        jc = getattr(self._jc, name)()
-        return Column(jc)
-    _.__doc__ = doc
-    return _
-
-
-def _func_op(name, doc=''):
-    def _(self):
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)(self._jc)
-        return Column(jc)
-    _.__doc__ = doc
-    return _
-
-
-def _bin_op(name, doc="binary operator"):
-    """ Create a method for given binary operator
-    """
-    def _(self, other):
-        jc = other._jc if isinstance(other, Column) else other
-        njc = getattr(self._jc, name)(jc)
-        return Column(njc)
-    _.__doc__ = doc
-    return _
-
-
-def _reverse_op(name, doc="binary operator"):
-    """ Create a method for binary operator (this object is on right side)
-    """
-    def _(self, other):
-        jother = _create_column_from_literal(other)
-        jc = getattr(jother, name)(self._jc)
-        return Column(jc)
-    _.__doc__ = doc
-    return _
-
-
-class Column(object):
-
-    """
-    A column in a DataFrame.
-
-    :class:`Column` instances can be created by::
-
-        # 1. Select a column out of a DataFrame
-
-        df.colName
-        df["colName"]
-
-        # 2. Create from an expression
-        df.colName + 1
-        1 / df.colName
-    """
-
-    def __init__(self, jc):
-        self._jc = jc
-
-    # arithmetic operators
-    __neg__ = _func_op("negate")
-    __add__ = _bin_op("plus")
-    __sub__ = _bin_op("minus")
-    __mul__ = _bin_op("multiply")
-    __div__ = _bin_op("divide")
-    __truediv__ = _bin_op("divide")
-    __mod__ = _bin_op("mod")
-    __radd__ = _bin_op("plus")
-    __rsub__ = _reverse_op("minus")
-    __rmul__ = _bin_op("multiply")
-    __rdiv__ = _reverse_op("divide")
-    __rtruediv__ = _reverse_op("divide")
-    __rmod__ = _reverse_op("mod")
-
-    # logistic operators
-    __eq__ = _bin_op("equalTo")
-    __ne__ = _bin_op("notEqual")
-    __lt__ = _bin_op("lt")
-    __le__ = _bin_op("leq")
-    __ge__ = _bin_op("geq")
-    __gt__ = _bin_op("gt")
-
-    # `and`, `or`, `not` cannot be overloaded in Python,
-    # so use bitwise operators as boolean operators
-    __and__ = _bin_op('and')
-    __or__ = _bin_op('or')
-    __invert__ = _func_op('not')
-    __rand__ = _bin_op("and")
-    __ror__ = _bin_op("or")
-
-    # container operators
-    __contains__ = _bin_op("contains")
-    __getitem__ = _bin_op("apply")
-
-    # bitwise operators
-    bitwiseOR = _bin_op("bitwiseOR")
-    bitwiseAND = _bin_op("bitwiseAND")
-    bitwiseXOR = _bin_op("bitwiseXOR")
-
-    def getItem(self, key):
-        """An expression that gets an item at position `ordinal` out of a list,
-         or gets an item by key out of a dict.
-
-        >>> df = sc.parallelize([([1, 2], {"key": "value"})]).toDF(["l", "d"])
-        >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
-        +----+------+
-        |l[0]|d[key]|
-        +----+------+
-        |   1| value|
-        +----+------+
-        >>> df.select(df.l[0], df.d["key"]).show()
-        +----+------+
-        |l[0]|d[key]|
-        +----+------+
-        |   1| value|
-        +----+------+
-        """
-        return self[key]
-
-    def getField(self, name):
-        """An expression that gets a field by name in a StructField.
-
-        >>> from pyspark.sql import Row
-        >>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF()
-        >>> df.select(df.r.getField("b")).show()
-        +----+
-        |r[b]|
-        +----+
-        |   b|
-        +----+
-        >>> df.select(df.r.a).show()
-        +----+
-        |r[a]|
-        +----+
-        |   1|
-        +----+
-        """
-        return self[name]
-
-    def __getattr__(self, item):
-        if item.startswith("__"):
-            raise AttributeError(item)
-        return self.getField(item)
-
-    # string methods
-    rlike = _bin_op("rlike")
-    like = _bin_op("like")
-    startswith = _bin_op("startsWith")
-    endswith = _bin_op("endsWith")
-
-    @ignore_unicode_prefix
-    def substr(self, startPos, length):
-        """
-        Return a :class:`Column` which is a substring of the column
-
-        :param startPos: start position (int or Column)
-        :param length:  length of the substring (int or Column)
-
-        >>> df.select(df.name.substr(1, 3).alias("col")).collect()
-        [Row(col=u'Ali'), Row(col=u'Bob')]
-        """
-        if type(startPos) != type(length):
-            raise TypeError("Can not mix the type")
-        if isinstance(startPos, (int, long)):
-            jc = self._jc.substr(startPos, length)
-        elif isinstance(startPos, Column):
-            jc = self._jc.substr(startPos._jc, length._jc)
-        else:
-            raise TypeError("Unexpected type: %s" % type(startPos))
-        return Column(jc)
-
-    __getslice__ = substr
-
-    @ignore_unicode_prefix
-    def inSet(self, *cols):
-        """ A boolean expression that is evaluated to true if the value of this
-        expression is contained by the evaluated values of the arguments.
-
-        >>> df[df.name.inSet("Bob", "Mike")].collect()
-        [Row(age=5, name=u'Bob')]
-        >>> df[df.age.inSet([1, 2, 3])].collect()
-        [Row(age=2, name=u'Alice')]
-        """
-        if len(cols) == 1 and isinstance(cols[0], (list, set)):
-            cols = cols[0]
-        cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols]
-        sc = SparkContext._active_spark_context
-        jc = getattr(self._jc, "in")(_to_seq(sc, cols))
-        return Column(jc)
-
-    # order
-    asc = _unary_op("asc", "Returns a sort expression based on the"
-                           " ascending order of the given column name.")
-    desc = _unary_op("desc", "Returns a sort expression based on the"
-                             " descending order of the given column name.")
-
-    isNull = _unary_op("isNull", "True if the current expression is null.")
-    isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
-
-    def alias(self, *alias):
-        """Returns this column aliased with a new name or names (in the case of expressions that
-        return more than one column, such as explode).
-
-        >>> df.select(df.age.alias("age2")).collect()
-        [Row(age2=2), Row(age2=5)]
-        """
-
-        if len(alias) == 1:
-            return Column(getattr(self._jc, "as")(alias[0]))
-        else:
-            sc = SparkContext._active_spark_context
-            return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
-
-    @ignore_unicode_prefix
-    def cast(self, dataType):
-        """ Convert the column into type `dataType`
-
-        >>> df.select(df.age.cast("string").alias('ages')).collect()
-        [Row(ages=u'2'), Row(ages=u'5')]
-        >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
-        [Row(ages=u'2'), Row(ages=u'5')]
-        """
-        if isinstance(dataType, basestring):
-            jc = self._jc.cast(dataType)
-        elif isinstance(dataType, DataType):
-            sc = SparkContext._active_spark_context
-            ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
-            jdt = ssql_ctx.parseDataType(dataType.json())
-            jc = self._jc.cast(jdt)
-        else:
-            raise TypeError("unexpected type: %s" % type(dataType))
-        return Column(jc)
-
-    @ignore_unicode_prefix
-    def between(self, lowerBound, upperBound):
-        """ A boolean expression that is evaluated to true if the value of this
-        expression is between the given columns.
-        """
-        return (self >= lowerBound) & (self <= upperBound)
-
-    @ignore_unicode_prefix
-    def when(self, condition, value):
-        """Evaluates a list of conditions and returns one of multiple possible result expressions.
-        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
-
-        See :func:`pyspark.sql.functions.when` for example usage.
-
-        :param condition: a boolean :class:`Column` expression.
-        :param value: a literal value, or a :class:`Column` expression.
-
-        """
-        sc = SparkContext._active_spark_context
-        if not isinstance(condition, Column):
-            raise TypeError("condition should be a Column")
-        v = value._jc if isinstance(value, Column) else value
-        jc = sc._jvm.functions.when(condition._jc, v)
-        return Column(jc)
-
-    @ignore_unicode_prefix
-    def otherwise(self, value):
-        """Evaluates a list of conditions and returns one of multiple possible result expressions.
-        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
-
-        See :func:`pyspark.sql.functions.when` for example usage.
-
-        :param value: a literal value, or a :class:`Column` expression.
-        """
-        v = value._jc if isinstance(value, Column) else value
-        jc = self._jc.otherwise(value)
-        return Column(jc)
-
-    def __repr__(self):
-        return 'Column<%s>' % self._jc.toString().encode('utf8')
-
-
 class DataFrameNaFunctions(object):
     """Functionality for working with missing data in :class:`DataFrame`.
     """
@@ -1646,9 +1206,6 @@ def _test():
         .toDF(StructType([StructField('age', IntegerType()),
                           StructField('name', StringType())]))
     globs['df2'] = sc.parallelize([Row(name='Tom', height=80), Row(name='Bob', height=85)]).toDF()
-    globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80),
-                                  Row(name='Bob', age=5, height=85)]).toDF()
-
     globs['df4'] = sc.parallelize([Row(name='Alice', age=10, height=80),
                                   Row(name='Bob', age=5, height=None),
                                   Row(name='Tom', age=None, height=None),
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 6cd6974b0e5bb..8d0e766ecd3b4 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -27,7 +27,7 @@
 from pyspark.rdd import _prepare_for_python_RDD, ignore_unicode_prefix
 from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
 from pyspark.sql.types import StringType
-from pyspark.sql.dataframe import Column, _to_java_column, _to_seq
+from pyspark.sql.column import Column, _to_java_column, _to_seq
 
 
 __all__ = [
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
new file mode 100644
index 0000000000000..9f7c743c051d3
--- /dev/null
+++ b/python/pyspark/sql/group.py
@@ -0,0 +1,183 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import *
+
+__all__ = ["GroupedData"]
+
+
+def dfapi(f):
+    def _api(self):
+        name = f.__name__
+        jdf = getattr(self._jdf, name)()
+        return DataFrame(jdf, self.sql_ctx)
+    _api.__name__ = f.__name__
+    _api.__doc__ = f.__doc__
+    return _api
+
+
+def df_varargs_api(f):
+    def _api(self, *args):
+        name = f.__name__
+        jdf = getattr(self._jdf, name)(_to_seq(self.sql_ctx._sc, args))
+        return DataFrame(jdf, self.sql_ctx)
+    _api.__name__ = f.__name__
+    _api.__doc__ = f.__doc__
+    return _api
+
+
+class GroupedData(object):
+    """
+    A set of methods for aggregations on a :class:`DataFrame`,
+    created by :func:`DataFrame.groupBy`.
+    """
+
+    def __init__(self, jdf, sql_ctx):
+        self._jdf = jdf
+        self.sql_ctx = sql_ctx
+
+    @ignore_unicode_prefix
+    def agg(self, *exprs):
+        """Compute aggregates and returns the result as a :class:`DataFrame`.
+
+        The available aggregate functions are `avg`, `max`, `min`, `sum`, `count`.
+
+        If ``exprs`` is a single :class:`dict` mapping from string to string, then the key
+        is the column to perform aggregation on, and the value is the aggregate function.
+
+        Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions.
+
+        :param exprs: a dict mapping from column name (string) to aggregate functions (string),
+            or a list of :class:`Column`.
+
+        >>> gdf = df.groupBy(df.name)
+        >>> gdf.agg({"*": "count"}).collect()
+        [Row(name=u'Alice', COUNT(1)=1), Row(name=u'Bob', COUNT(1)=1)]
+
+        >>> from pyspark.sql import functions as F
+        >>> gdf.agg(F.min(df.age)).collect()
+        [Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
+        """
+        assert exprs, "exprs should not be empty"
+        if len(exprs) == 1 and isinstance(exprs[0], dict):
+            jdf = self._jdf.agg(exprs[0])
+        else:
+            # Columns
+            assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
+            jdf = self._jdf.agg(exprs[0]._jc,
+                                _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]]))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @dfapi
+    def count(self):
+        """Counts the number of records for each group.
+
+        >>> df.groupBy(df.age).count().collect()
+        [Row(age=2, count=1), Row(age=5, count=1)]
+        """
+
+    @df_varargs_api
+    def mean(self, *cols):
+        """Computes average values for each numeric columns for each group.
+
+        :func:`mean` is an alias for :func:`avg`.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().mean('age').collect()
+        [Row(AVG(age)=3.5)]
+        >>> df3.groupBy().mean('age', 'height').collect()
+        [Row(AVG(age)=3.5, AVG(height)=82.5)]
+        """
+
+    @df_varargs_api
+    def avg(self, *cols):
+        """Computes average values for each numeric columns for each group.
+
+        :func:`mean` is an alias for :func:`avg`.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().avg('age').collect()
+        [Row(AVG(age)=3.5)]
+        >>> df3.groupBy().avg('age', 'height').collect()
+        [Row(AVG(age)=3.5, AVG(height)=82.5)]
+        """
+
+    @df_varargs_api
+    def max(self, *cols):
+        """Computes the max value for each numeric columns for each group.
+
+        >>> df.groupBy().max('age').collect()
+        [Row(MAX(age)=5)]
+        >>> df3.groupBy().max('age', 'height').collect()
+        [Row(MAX(age)=5, MAX(height)=85)]
+        """
+
+    @df_varargs_api
+    def min(self, *cols):
+        """Computes the min value for each numeric column for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().min('age').collect()
+        [Row(MIN(age)=2)]
+        >>> df3.groupBy().min('age', 'height').collect()
+        [Row(MIN(age)=2, MIN(height)=80)]
+        """
+
+    @df_varargs_api
+    def sum(self, *cols):
+        """Compute the sum for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().sum('age').collect()
+        [Row(SUM(age)=7)]
+        >>> df3.groupBy().sum('age', 'height').collect()
+        [Row(SUM(age)=7, SUM(height)=165)]
+        """
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import Row, SQLContext
+    import pyspark.sql.group
+    globs = pyspark.sql.group.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['sqlContext'] = SQLContext(sc)
+    globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
+        .toDF(StructType([StructField('age', IntegerType()),
+                          StructField('name', StringType())]))
+    globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80),
+                                   Row(name='Bob', age=5, height=85)]).toDF()
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.group, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/run-tests b/python/run-tests
index f2757a3967e81..ffde2fb24b369 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -72,7 +72,9 @@ function run_sql_tests() {
     echo "Run sql tests ..."
     run_test "pyspark/sql/_types.py"
     run_test "pyspark/sql/context.py"
+    run_test "pyspark/sql/column.py"
     run_test "pyspark/sql/dataframe.py"
+    run_test "pyspark/sql/group.py"
     run_test "pyspark/sql/functions.py"
     run_test "pyspark/sql/tests.py"
 }

From deb411335a09b91eb1f75421d77e1c3686719621 Mon Sep 17 00:00:00 2001
From: AiHe <ai.he@ussuning.com>
Date: Fri, 15 May 2015 20:42:35 -0700
Subject: [PATCH 204/320] [SPARK-7473] [MLLIB] Add reservoir sample in
 RandomForest

reservoir feature sample by using existing api

Author: AiHe <ai.he@ussuning.com>

Closes #5988 from AiHe/reservoir and squashes the following commits:

e7a41ac [AiHe] remove non-robust testing case
28ffb9a [AiHe] set seed as rng.nextLong
37459e1 [AiHe] set fixed seed
1e98a4c [AiHe] [MLLIB][tree] Add reservoir sample in RandomForest
---
 .../scala/org/apache/spark/mllib/tree/RandomForest.scala    | 6 +++---
 .../org/apache/spark/mllib/tree/RandomForestSuite.scala     | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 055e60c7d9c95..b347c450c1aa8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -36,6 +36,7 @@ import org.apache.spark.mllib.tree.model._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
+import org.apache.spark.util.random.SamplingUtils
 
 /**
  * :: Experimental ::
@@ -473,9 +474,8 @@ object RandomForest extends Serializable with Logging {
       val (treeIndex, node) = nodeQueue.head
       // Choose subset of features for node (if subsampling).
       val featureSubset: Option[Array[Int]] = if (metadata.subsamplingFeatures) {
-        // TODO: Use more efficient subsampling?  (use selection-and-rejection or reservoir)
-        Some(rng.shuffle(Range(0, metadata.numFeatures).toList)
-          .take(metadata.numFeaturesPerNode).toArray)
+        Some(SamplingUtils.reservoirSampleAndCount(Range(0, 
+          metadata.numFeatures).iterator, metadata.numFeaturesPerNode, rng.nextLong)._1)
       } else {
         None
       }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
index ee3bc98486862..4ed66953cb628 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
@@ -196,7 +196,6 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext {
       numClasses = 3, categoricalFeaturesInfo = categoricalFeaturesInfo)
     val model = RandomForest.trainClassifier(input, strategy, numTrees = 2,
       featureSubsetStrategy = "sqrt", seed = 12345)
-    EnsembleTestHelper.validateClassifier(model, arr, 1.0)
   }
 
   test("subsampling rate in RandomForest"){

From 578bfeeff514228f6fd4b07a536815fbb3510f7e Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 15 May 2015 22:00:31 -0700
Subject: [PATCH 205/320] [SPARK-7654][SQL] DataFrameReader and DataFrameWriter
 for input/output API

This patch introduces DataFrameWriter and DataFrameReader.

DataFrameReader interface, accessible through SQLContext.read, contains methods that create DataFrames. These methods used to reside in SQLContext. Example usage:
```scala
sqlContext.read.json("...")
sqlContext.read.parquet("...")
```

DataFrameWriter interface, accessible through DataFrame.write, implements a builder pattern to avoid the proliferation of options in writing DataFrame out. It currently implements:
- mode
- format (e.g. "parquet", "json")
- options (generic options passed down into data sources)
- partitionBy (partitioning columns)
Example usage:
```scala
df.write.mode("append").format("json").partitionBy("date").saveAsTable("myJsonTable")
```

TODO:

- [ ] Documentation update
- [ ] Move JDBC into reader / writer?
- [ ] Deprecate the old interfaces
- [ ] Move the generic load interface into reader.
- [ ] Update example code and documentation

Author: Reynold Xin <rxin@databricks.com>

Closes #6175 from rxin/reader-writer and squashes the following commits:

b146c95 [Reynold Xin] Deprecation of old APIs.
bd8abdf [Reynold Xin] Fixed merge conflict.
26abea2 [Reynold Xin] Added general load methods.
244fbec [Reynold Xin] Added equivalent to example.
4f15d92 [Reynold Xin] Added documentation for partitionBy.
7e91611 [Reynold Xin] [SPARK-7654][SQL] DataFrameReader and DataFrameWriter for input/output API.
---
 .../spark/examples/sql/JavaSparkSQL.java      |   4 +-
 .../spark/examples/mllib/DatasetExample.scala |   2 +-
 .../spark/examples/sql/RDDRelation.scala      |   2 +-
 .../org/apache/spark/sql/DataFrame.scala      | 172 +++-------
 .../apache/spark/sql/DataFrameReader.scala    | 218 ++++++++++++
 .../apache/spark/sql/DataFrameWriter.scala    | 198 +++++++++++
 .../org/apache/spark/sql/SQLContext.scala     | 158 +++------
 .../spark/sql/parquet/ParquetTest.scala       |   8 +-
 .../spark/sql/sources/JavaSaveLoadSuite.java  |   8 +-
 .../org/apache/spark/sql/DataFrameSuite.scala |   4 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  17 +-
 .../spark/sql/UserDefinedTypeSuite.scala      |   4 +-
 .../org/apache/spark/sql/json/JsonSuite.scala |  50 +--
 .../sql/parquet/ParquetFilterSuite.scala      |   6 +-
 .../spark/sql/parquet/ParquetIOSuite.scala    |  41 ++-
 .../ParquetPartitionDiscoverySuite.scala      |  16 +-
 .../sources/CreateTableAsSelectSuite.scala    |   2 +-
 .../spark/sql/sources/InsertSuite.scala       |  10 +-
 .../spark/sql/sources/SaveLoadSuite.scala     |  26 +-
 .../spark/sql/hive/HiveStrategies.scala       |   4 +-
 .../spark/sql/hive/HiveParquetSuite.scala     |   8 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  18 +-
 .../apache/spark/sql/hive/parquetSuites.scala |  16 +-
 .../sql/sources/hadoopFsRelationSuites.scala  | 321 ++++++++----------
 24 files changed, 772 insertions(+), 541 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala

diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
index 8159ffbe2d269..173633ce059e3 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -99,7 +99,7 @@ public String call(Row row) {
     // Read in the parquet file created above.
     // Parquet files are self-describing so the schema is preserved.
     // The result of loading a parquet file is also a DataFrame.
-    DataFrame parquetFile = sqlContext.parquetFile("people.parquet");
+    DataFrame parquetFile = sqlContext.read().parquet("people.parquet");
 
     //Parquet files can also be registered as tables and then used in SQL statements.
     parquetFile.registerTempTable("parquetFile");
@@ -120,7 +120,7 @@ public String call(Row row) {
     // The path can be either a single text file or a directory storing text files.
     String path = "examples/src/main/resources/people.json";
     // Create a DataFrame from the file(s) pointed by path
-    DataFrame peopleFromJsonFile = sqlContext.jsonFile(path);
+    DataFrame peopleFromJsonFile = sqlContext.read().json(path);
 
     // Because the schema of a JSON dataset is automatically inferred, to write queries,
     // it is better to take a look at what is the schema.
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
index e943d6c889fab..c95cca7d656e8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
@@ -106,7 +106,7 @@ object DatasetExample {
     df.saveAsParquetFile(outputDir)
 
     println(s"Loading Parquet file with UDT from $outputDir.")
-    val newDataset = sqlContext.parquetFile(outputDir)
+    val newDataset = sqlContext.read.parquet(outputDir)
 
     println(s"Schema from Parquet: ${newDataset.schema.prettyJson}")
     val newFeatures = newDataset.select("features").map { case Row(v: Vector) => v }
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index 6331d1c0060f8..acc89199d5849 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -61,7 +61,7 @@ object RDDRelation {
     df.saveAsParquetFile("pair.parquet")
 
     // Read in parquet file.  Parquet files are self-describing so the schmema is preserved.
-    val parquetFile = sqlContext.parquetFile("pair.parquet")
+    val parquetFile = sqlContext.read.parquet("pair.parquet")
 
     // Queries can be run using the DSL on parequet files just like the original RDD.
     parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 2e20c3d3f4ed2..55ef357a99f71 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1289,6 +1289,16 @@ class DataFrame private[sql](
     sqlContext.registerDataFrameAsTable(this, tableName)
   }
 
+  /**
+   * :: Experimental ::
+   * Interface for saving the content of the [[DataFrame]] out into external storage.
+   *
+   * @group output
+   * @since 1.4.0
+   */
+  @Experimental
+  def write: DataFrameWriter = new DataFrameWriter(this)
+
   /**
    * Saves the contents of this [[DataFrame]] as a parquet file, preserving the schema.
    * Files that are written out using this method can be read back in as a [[DataFrame]]
@@ -1296,16 +1306,16 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
+  @deprecated("Use write.parquet(path)", "1.4.0")
   def saveAsParquetFile(path: String): Unit = {
     if (sqlContext.conf.parquetUseDataSourceApi) {
-      save("org.apache.spark.sql.parquet", SaveMode.ErrorIfExists, Map("path" -> path))
+      write.format("parquet").mode(SaveMode.ErrorIfExists).save(path)
     } else {
       sqlContext.executePlan(WriteToFile(path, logicalPlan)).toRdd
     }
   }
 
   /**
-   * :: Experimental ::
    * Creates a table from the the contents of this DataFrame.
    * It will use the default data source configured by spark.sql.sources.default.
    * This will fail if the table already exists.
@@ -1320,13 +1330,12 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String): Unit = {
-    saveAsTable(tableName, SaveMode.ErrorIfExists)
+    write.mode(SaveMode.ErrorIfExists).saveAsTable(tableName)
   }
 
   /**
-   * :: Experimental ::
    * Creates a table from the the contents of this DataFrame, using the default data source
    * configured by spark.sql.sources.default and [[SaveMode.ErrorIfExists]] as the save mode.
    *
@@ -1340,20 +1349,18 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.mode(mode).saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String, mode: SaveMode): Unit = {
     if (sqlContext.catalog.tableExists(Seq(tableName)) && mode == SaveMode.Append) {
       // If table already exists and the save mode is Append,
       // we will just call insertInto to append the contents of this DataFrame.
       insertInto(tableName, overwrite = false)
     } else {
-      val dataSourceName = sqlContext.conf.defaultDataSourceName
-      saveAsTable(tableName, dataSourceName, mode)
+      write.mode(mode).saveAsTable(tableName)
     }
   }
 
   /**
-   * :: Experimental ::
    * Creates a table at the given path from the the contents of this DataFrame
    * based on a given data source and a set of options,
    * using [[SaveMode.ErrorIfExists]] as the save mode.
@@ -1368,9 +1375,9 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String, source: String): Unit = {
-    saveAsTable(tableName, source, SaveMode.ErrorIfExists)
+    write.format(source).saveAsTable(tableName)
   }
 
   /**
@@ -1388,13 +1395,12 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).mode(mode).saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
-    saveAsTable(tableName, source, mode, Map.empty[String, String])
+    write.format(source).mode(mode).saveAsTable(tableName)
   }
 
   /**
-   * :: Experimental ::
    * Creates a table at the given path from the the contents of this DataFrame
    * based on a given data source, [[SaveMode]] specified by mode, and a set of options.
    *
@@ -1408,40 +1414,17 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)",
+    "1.4.0")
   def saveAsTable(
       tableName: String,
       source: String,
       mode: SaveMode,
       options: java.util.Map[String, String]): Unit = {
-    saveAsTable(tableName, source, mode, options.toMap)
-  }
-
-  /**
-   * :: Experimental ::
-   * Creates a table at the given path from the the contents of this DataFrame
-   * based on a given data source, [[SaveMode]] specified by mode, a set of options, and a list of
-   * partition columns.
-   *
-   * Note that this currently only works with DataFrames that are created from a HiveContext as
-   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
-   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
-   * be the target of an `insertInto`.
-   * @group output
-   * @since 1.4.0
-   */
-  @Experimental
-  def saveAsTable(
-      tableName: String,
-      source: String,
-      mode: SaveMode,
-      options: java.util.Map[String, String],
-      partitionColumns: java.util.List[String]): Unit = {
-    saveAsTable(tableName, source, mode, options.toMap, partitionColumns)
+    write.format(source).mode(mode).options(options).saveAsTable(tableName)
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific)
    * Creates a table from the the contents of this DataFrame based on a given data source,
    * [[SaveMode]] specified by mode, and a set of options.
@@ -1456,167 +1439,88 @@ class DataFrame private[sql](
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)",
+    "1.4.0")
   def saveAsTable(
       tableName: String,
       source: String,
       mode: SaveMode,
       options: Map[String, String]): Unit = {
-    val cmd =
-      CreateTableUsingAsSelect(
-        tableName,
-        source,
-        temporary = false,
-        Array.empty[String],
-        mode,
-        options,
-        logicalPlan)
-
-    sqlContext.executePlan(cmd).toRdd
+    write.format(source).mode(mode).options(options).saveAsTable(tableName)
   }
 
   /**
-   * :: Experimental ::
-   * Creates a table at the given path from the the contents of this DataFrame
-   * based on a given data source, [[SaveMode]] specified by mode, a set of options, and a list of
-   * partition columns.
-   *
-   * Note that this currently only works with DataFrames that are created from a HiveContext as
-   * there is no notion of a persisted catalog in a standard SQL context.  Instead you can write
-   * an RDD out to a parquet file, and then register that file as a table.  This "table" can then
-   * be the target of an `insertInto`.
-   * @group output
-   * @since 1.4.0
-   */
-  @Experimental
-  def saveAsTable(
-      tableName: String,
-      source: String,
-      mode: SaveMode,
-      options: Map[String, String],
-      partitionColumns: Seq[String]): Unit = {
-    sqlContext.executePlan(
-      CreateTableUsingAsSelect(
-        tableName,
-        source,
-        temporary = false,
-        partitionColumns.toArray,
-        mode,
-        options,
-        logicalPlan)).toRdd
-  }
-
-  /**
-   * :: Experimental ::
    * Saves the contents of this DataFrame to the given path,
    * using the default data source configured by spark.sql.sources.default and
    * [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.save(path)", "1.4.0")
   def save(path: String): Unit = {
-    save(path, SaveMode.ErrorIfExists)
+    write.save(path)
   }
 
   /**
-   * :: Experimental ::
    * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
    * using the default data source configured by spark.sql.sources.default.
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.mode(mode).save(path)", "1.4.0")
   def save(path: String, mode: SaveMode): Unit = {
-    val dataSourceName = sqlContext.conf.defaultDataSourceName
-    save(path, dataSourceName, mode)
+    write.mode(mode).save(path)
   }
 
   /**
-   * :: Experimental ::
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * using [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).save(path)", "1.4.0")
   def save(path: String, source: String): Unit = {
-    save(source, SaveMode.ErrorIfExists, Map("path" -> path))
+    write.format(source).save(path)
   }
 
   /**
-   * :: Experimental ::
    * Saves the contents of this DataFrame to the given path based on the given data source and
    * [[SaveMode]] specified by mode.
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).mode(mode).save(path)", "1.4.0")
   def save(path: String, source: String, mode: SaveMode): Unit = {
-    save(source, mode, Map("path" -> path))
+    write.format(source).mode(mode).save(path)
   }
 
   /**
-   * :: Experimental ::
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options.
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0")
   def save(
       source: String,
       mode: SaveMode,
       options: java.util.Map[String, String]): Unit = {
-    save(source, mode, options.toMap)
+    write.format(source).mode(mode).options(options).save()
   }
 
   /**
-   * :: Experimental ::
-   * Saves the contents of this DataFrame to the given path based on the given data source,
-   * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
-   * @group output
-   * @since 1.4.0
-   */
-  @Experimental
-  def save(
-      source: String,
-      mode: SaveMode,
-      options: java.util.Map[String, String],
-      partitionColumns: java.util.List[String]): Unit = {
-    save(source, mode, options.toMap, partitionColumns)
-  }
-
-  /**
-   * :: Experimental ::
    * (Scala-specific)
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options
    * @group output
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0")
   def save(
       source: String,
       mode: SaveMode,
       options: Map[String, String]): Unit = {
-    ResolvedDataSource(sqlContext, source, Array.empty[String], mode, options, this)
-  }
-
-  /**
-   * :: Experimental ::
-   * Saves the contents of this DataFrame to the given path based on the given data source,
-   * [[SaveMode]] specified by mode, and partition columns specified by `partitionColumns`.
-   * @group output
-   * @since 1.4.0
-   */
-  @Experimental
-  def save(
-      source: String,
-      mode: SaveMode,
-      options: Map[String, String],
-      partitionColumns: Seq[String]): Unit = {
-    ResolvedDataSource(sqlContext, source, partitionColumns.toArray, mode, options, this)
+    write.format(source).mode(mode).options(options).save()
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
new file mode 100644
index 0000000000000..4d63faad6fb7c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -0,0 +1,218 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.json.{JsonRDD, JSONRelation}
+import org.apache.spark.sql.parquet.ParquetRelation2
+import org.apache.spark.sql.sources.{LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * :: Experimental ::
+ * Interface used to load a [[DataFrame]] from external storage systems (e.g. file systems,
+ * key-value stores, etc).
+ *
+ * @since 1.4.0
+ */
+@Experimental
+class DataFrameReader private[sql](sqlContext: SQLContext) {
+
+  /**
+   * Specifies the input data source format.
+   *
+   * @since 1.4.0
+   */
+  def format(source: String): DataFrameReader = {
+    this.source = source
+    this
+  }
+
+  /**
+   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
+   * automatically from data. By specifying the schema here, the underlying data source can
+   * skip the schema inference step, and thus speed up data loading.
+   *
+   * @since 1.4.0
+   */
+  def schema(schema: StructType): DataFrameReader = {
+    this.userSpecifiedSchema = Option(schema)
+    this
+  }
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * @since 1.4.0
+   */
+  def option(key: String, value: String): DataFrameReader = {
+    this.extraOptions += (key -> value)
+    this
+  }
+
+  /**
+   * (Scala-specific) Adds input options for the underlying data source.
+   *
+   * @since 1.4.0
+   */
+  def options(options: scala.collection.Map[String, String]): DataFrameReader = {
+    this.extraOptions ++= options
+    this
+  }
+
+  /**
+   * Adds input options for the underlying data source.
+   *
+   * @since 1.4.0
+   */
+  def options(options: java.util.Map[String, String]): DataFrameReader = {
+    this.options(scala.collection.JavaConversions.mapAsScalaMap(options))
+    this
+  }
+
+  /**
+   * Specifies the input partitioning. If specified, the underlying data source does not need to
+   * discover the data partitioning scheme, and thus can speed up very large inputs.
+   *
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(colNames: String*): DataFrameReader = {
+    this.partitioningColumns = Option(colNames)
+    this
+  }
+
+  /**
+   * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by
+   * a local or distributed file system).
+   *
+   * @since 1.4.0
+   */
+  def load(path: String): DataFrame = {
+    option("path", path).load()
+  }
+
+  /**
+   * Loads input in as a [[DataFrame]], for data sources that don't require a path (e.g. external
+   * key-value stores).
+   *
+   * @since 1.4.0
+   */
+  def load(): DataFrame = {
+    val resolved = ResolvedDataSource(
+      sqlContext,
+      userSpecifiedSchema = userSpecifiedSchema,
+      partitionColumns = partitioningColumns.map(_.toArray).getOrElse(Array.empty[String]),
+      provider = source,
+      options = extraOptions.toMap)
+    DataFrame(sqlContext, LogicalRelation(resolved.relation))
+  }
+
+  /**
+   * Loads a JSON file (one object per line) and returns the result as a [[DataFrame]].
+   *
+   * This function goes through the input once to determine the input schema. If you know the
+   * schema in advance, use the version that specifies the schema to avoid the extra scan.
+   *
+   * @param path input path
+   * @since 1.4.0
+   */
+  def json(path: String): DataFrame = format("json").load(path)
+
+  /**
+   * Loads an `JavaRDD[String]` storing JSON objects (one object per record) and
+   * returns the result as a [[DataFrame]].
+   *
+   * Unless the schema is specified using [[schema]] function, this function goes through the
+   * input once to determine the input schema.
+   *
+   * @param jsonRDD input RDD with one JSON object per record
+   * @since 1.4.0
+   */
+  def json(jsonRDD: JavaRDD[String]): DataFrame = json(jsonRDD.rdd)
+
+  /**
+   * Loads an `RDD[String]` storing JSON objects (one object per record) and
+   * returns the result as a [[DataFrame]].
+   *
+   * Unless the schema is specified using [[schema]] function, this function goes through the
+   * input once to determine the input schema.
+   *
+   * @param jsonRDD input RDD with one JSON object per record
+   * @since 1.4.0
+   */
+  def json(jsonRDD: RDD[String]): DataFrame = {
+    val samplingRatio = extraOptions.getOrElse("samplingRatio", "1.0").toDouble
+    if (sqlContext.conf.useJacksonStreamingAPI) {
+      sqlContext.baseRelationToDataFrame(
+        new JSONRelation(() => jsonRDD, None, samplingRatio, userSpecifiedSchema)(sqlContext))
+    } else {
+      val columnNameOfCorruptJsonRecord = sqlContext.conf.columnNameOfCorruptRecord
+      val appliedSchema = userSpecifiedSchema.getOrElse(
+        JsonRDD.nullTypeToStringType(
+          JsonRDD.inferSchema(jsonRDD, 1.0, columnNameOfCorruptJsonRecord)))
+      val rowRDD = JsonRDD.jsonStringToRow(jsonRDD, appliedSchema, columnNameOfCorruptJsonRecord)
+      sqlContext.createDataFrame(rowRDD, appliedSchema, needsConversion = false)
+    }
+  }
+
+  /**
+   * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
+   * [[DataFrame]] if no paths are passed in.
+   *
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def parquet(paths: String*): DataFrame = {
+    if (paths.isEmpty) {
+      sqlContext.emptyDataFrame
+    } else {
+      val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
+      sqlContext.baseRelationToDataFrame(
+        new ParquetRelation2(
+          globbedPaths.map(_.toString), None, None, Map.empty[String, String])(sqlContext))
+    }
+  }
+
+  /**
+   * Returns the specified table as a [[DataFrame]].
+   *
+   * @since 1.4.0
+   */
+  def table(tableName: String): DataFrame = {
+    DataFrame(sqlContext, sqlContext.catalog.lookupRelation(Seq(tableName)))
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Builder pattern config options
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  private var source: String = sqlContext.conf.defaultDataSourceName
+
+  private var userSpecifiedSchema: Option[StructType] = None
+
+  private var extraOptions = new scala.collection.mutable.HashMap[String, String]
+
+  private var partitioningColumns: Option[Seq[String]] = None
+
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
new file mode 100644
index 0000000000000..b1fc18ac3cb54
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -0,0 +1,198 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
+
+
+/**
+ * :: Experimental ::
+ * Interface used to write a [[DataFrame]] to external storage systems (e.g. file systems,
+ * key-value stores, etc).
+ *
+ * @since 1.4.0
+ */
+@Experimental
+final class DataFrameWriter private[sql](df: DataFrame) {
+
+  /**
+   * Specifies the behavior when data or table already exists. Options include:
+   *   - `SaveMode.Overwrite`: overwrite the existing data.
+   *   - `SaveMode.Append`: append the data.
+   *   - `SaveMode.Ignore`: ignore the operation (i.e. no-op).
+   *   - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.
+   *
+   * @since 1.4.0
+   */
+  def mode(saveMode: SaveMode): DataFrameWriter = {
+    this.mode = saveMode
+    this
+  }
+
+  /**
+   * Specifies the behavior when data or table already exists. Options include:
+   *   - `overwrite`: overwrite the existing data.
+   *   - `append`: append the data.
+   *   - `ignore`: ignore the operation (i.e. no-op).
+   *   - `error`: default option, throw an exception at runtime.
+   *
+   * @since 1.4.0
+   */
+  def mode(saveMode: String): DataFrameWriter = {
+    saveMode.toLowerCase match {
+      case "overwrite" => SaveMode.Overwrite
+      case "append" => SaveMode.Append
+      case "ignore" => SaveMode.Ignore
+      case "error" | "default" => SaveMode.ErrorIfExists
+      case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " +
+        "Accepted modes are 'overwrite', 'append', 'ignore', 'error'.")
+    }
+    this
+  }
+
+  /**
+   * Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
+   *
+   * @since 1.4.0
+   */
+  def format(source: String): DataFrameWriter = {
+    this.source = source
+    this
+  }
+
+  /**
+   * Adds an output option for the underlying data source.
+   *
+   * @since 1.4.0
+   */
+  def option(key: String, value: String): DataFrameWriter = {
+    this.extraOptions += (key -> value)
+    this
+  }
+
+  /**
+   * (Scala-specific) Adds output options for the underlying data source.
+   *
+   * @since 1.4.0
+   */
+  def options(options: scala.collection.Map[String, String]): DataFrameWriter = {
+    this.extraOptions ++= options
+    this
+  }
+
+  /**
+   * Adds output options for the underlying data source.
+   *
+   * @since 1.4.0
+   */
+  def options(options: java.util.Map[String, String]): DataFrameWriter = {
+    this.options(scala.collection.JavaConversions.mapAsScalaMap(options))
+    this
+  }
+
+  /**
+   * Partitions the output by the given columns on the file system. If specified, the output is
+   * laid out on the file system similar to Hive's partitioning scheme.
+   *
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(colNames: String*): DataFrameWriter = {
+    this.partitioningColumns = Option(colNames)
+    this
+  }
+
+  /**
+   * Saves the content of the [[DataFrame]] at the specified path.
+   *
+   * @since 1.4.0
+   */
+  def save(path: String): Unit = {
+    this.extraOptions += ("path" -> path)
+    save()
+  }
+
+  /**
+   * Saves the content of the [[DataFrame]] as the specified table.
+   *
+   * @since 1.4.0
+   */
+  def save(): Unit = {
+    ResolvedDataSource(
+      df.sqlContext,
+      source,
+      partitioningColumns.map(_.toArray).getOrElse(Array.empty[String]),
+      mode,
+      extraOptions.toMap,
+      df)
+  }
+
+  /**
+   * Saves the content of the [[DataFrame]] as the specified table.
+   *
+   * @since 1.4.0
+   */
+  def saveAsTable(tableName: String): Unit = {
+    val cmd =
+      CreateTableUsingAsSelect(
+        tableName,
+        source,
+        temporary = false,
+        partitioningColumns.map(_.toArray).getOrElse(Array.empty[String]),
+        mode,
+        extraOptions.toMap,
+        df.logicalPlan)
+    df.sqlContext.executePlan(cmd).toRdd
+  }
+
+  /**
+   * Saves the content of the [[DataFrame]] in JSON format at the specified path.
+   * This is equivalent to:
+   * {{{
+   *   format("json").save(path)
+   * }}}
+   *
+   * @since 1.4.0
+   */
+  def json(path: String): Unit = format("json").save(path)
+
+  /**
+   * Saves the content of the [[DataFrame]] in Parquet format at the specified path.
+   * This is equivalent to:
+   * {{{
+   *   format("parquet").save(path)
+   * }}}
+   *
+   * @since 1.4.0
+   */
+  def parquet(path: String): Unit = format("parquet").save(path)
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Builder pattern config options
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  private var source: String = df.sqlContext.conf.defaultDataSourceName
+
+  private var mode: SaveMode = SaveMode.ErrorIfExists
+
+  private var extraOptions = new scala.collection.mutable.HashMap[String, String]
+
+  private var partitioningColumns: Option[Seq[String]] = None
+
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 9fb355eb81939..34a50e522c4ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -27,11 +27,9 @@ import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
 import com.google.common.reflect.TypeToken
-import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
@@ -43,8 +41,6 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.ParserDialect
 import org.apache.spark.sql.execution.{Filter, _}
 import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
-import org.apache.spark.sql.json._
-import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -596,6 +592,20 @@ class SQLContext(@transient val sparkContext: SparkContext)
     createDataFrame(rdd, beanClass)
   }
 
+  /**
+   * :: Experimental ::
+   * Returns a [[DataFrameReader]] that can be used to read data in as a [[DataFrame]].
+   * {{{
+   *   sqlContext.read.parquet("/path/to/file.parquet")
+   *   sqlContext.read.schema(schema).json("/path/to/file.json")
+   * }}}
+   *
+   * @group genericdata
+   * @since 1.4.0
+   */
+  @Experimental
+  def read: DataFrameReader = new DataFrameReader(this)
+
   /**
    * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
    * [[DataFrame]] if no paths are passed in.
@@ -603,15 +613,13 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group specificdata
    * @since 1.3.0
    */
+  @deprecated("Use read.parquet()", "1.4.0")
   @scala.annotation.varargs
   def parquetFile(paths: String*): DataFrame = {
     if (paths.isEmpty) {
       emptyDataFrame
     } else if (conf.parquetUseDataSourceApi) {
-      val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
-      baseRelationToDataFrame(
-        new ParquetRelation2(
-          globbedPaths.map(_.toString), None, None, Map.empty[String, String])(this))
+      read.parquet(paths : _*)
     } else {
       DataFrame(this, parquet.ParquetRelation(
         paths.mkString(","), Some(sparkContext.hadoopConfiguration), this))
@@ -625,28 +633,31 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group specificdata
    * @since 1.3.0
    */
-  def jsonFile(path: String): DataFrame = jsonFile(path, 1.0)
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonFile(path: String): DataFrame = {
+    read.json(path)
+  }
 
   /**
-   * :: Experimental ::
    * Loads a JSON file (one object per line) and applies the given schema,
    * returning the result as a [[DataFrame]].
    *
    * @group specificdata
    * @since 1.3.0
    */
-  @Experimental
-  def jsonFile(path: String, schema: StructType): DataFrame =
-    load("json", schema, Map("path" -> path))
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonFile(path: String, schema: StructType): DataFrame = {
+    read.schema(schema).json(path)
+  }
 
   /**
-   * :: Experimental ::
    * @group specificdata
    * @since 1.3.0
    */
-  @Experimental
-  def jsonFile(path: String, samplingRatio: Double): DataFrame =
-    load("json", Map("path" -> path, "samplingRatio" -> samplingRatio.toString))
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
+    read.option("samplingRatio", samplingRatio.toString).json(path)
+  }
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
@@ -656,8 +667,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group specificdata
    * @since 1.3.0
    */
-  def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0)
-
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
@@ -667,196 +678,131 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group specificdata
    * @since 1.3.0
    */
-  def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0)
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
 
   /**
-   * :: Experimental ::
    * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
    * returning the result as a [[DataFrame]].
    *
    * @group specificdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.json()", "1.4.0")
   def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
-    if (conf.useJacksonStreamingAPI) {
-      baseRelationToDataFrame(new JSONRelation(() => json, None, 1.0, Some(schema))(this))
-    } else {
-      val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
-      val appliedSchema =
-        Option(schema).getOrElse(
-          JsonRDD.nullTypeToStringType(
-            JsonRDD.inferSchema(json, 1.0, columnNameOfCorruptJsonRecord)))
-      val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
-      createDataFrame(rowRDD, appliedSchema, needsConversion = false)
-    }
+    read.schema(schema).json(json)
   }
 
   /**
-   * :: Experimental ::
    * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.json()", "1.4.0")
   def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
-    jsonRDD(json.rdd, schema)
+    read.schema(schema).json(json)
   }
 
   /**
-   * :: Experimental ::
    * Loads an RDD[String] storing JSON objects (one object per record) inferring the
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.json()", "1.4.0")
   def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
-    if (conf.useJacksonStreamingAPI) {
-      baseRelationToDataFrame(new JSONRelation(() => json, None, samplingRatio, None)(this))
-    } else {
-      val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord
-      val appliedSchema =
-        JsonRDD.nullTypeToStringType(
-          JsonRDD.inferSchema(json, samplingRatio, columnNameOfCorruptJsonRecord))
-      val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord)
-      createDataFrame(rowRDD, appliedSchema, needsConversion = false)
-    }
+    read.option("samplingRatio", samplingRatio.toString).json(json)
   }
 
   /**
-   * :: Experimental ::
    * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
    * schema, returning the result as a [[DataFrame]].
    *
    * @group specificdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.json()", "1.4.0")
   def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
-    jsonRDD(json.rdd, samplingRatio);
+    read.option("samplingRatio", samplingRatio.toString).json(json)
   }
 
   /**
-   * :: Experimental ::
    * Returns the dataset stored at path as a DataFrame,
    * using the default data source configured by spark.sql.sources.default.
    *
    * @group genericdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.load(path)", "1.4.0")
   def load(path: String): DataFrame = {
-    val dataSourceName = conf.defaultDataSourceName
-    load(path, dataSourceName)
+    read.load(path)
   }
 
   /**
-   * :: Experimental ::
    * Returns the dataset stored at path as a DataFrame, using the given data source.
    *
    * @group genericdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.format(source).load(path)", "1.4.0")
   def load(path: String, source: String): DataFrame = {
-    load(source, Map("path" -> path))
+    read.format(source).load(path)
   }
 
   /**
-   * :: Experimental ::
    * (Java-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame.
    *
    * @group genericdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
   def load(source: String, options: java.util.Map[String, String]): DataFrame = {
-    load(source, options.toMap)
+    read.options(options).format(source).load()
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame.
    *
    * @group genericdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
   def load(source: String, options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, None, Array.empty[String], source, options)
-    DataFrame(this, LogicalRelation(resolved.relation))
-  }
-
-  /**
-   * :: Experimental ::
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
-   *
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @Experimental
-  def load(
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    load(source, schema, options.toMap)
+    read.options(options).format(source).load()
   }
 
   /**
-   * :: Experimental ::
    * (Java-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    *
    * @group genericdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
   def load(
       source: String,
       schema: StructType,
-      partitionColumns: Array[String],
       options: java.util.Map[String, String]): DataFrame = {
-    load(source, schema, partitionColumns, options.toMap)
-  }
-
-  /**
-   * :: Experimental ::
-   * (Scala-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @Experimental
-  def load(
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, Some(schema), Array.empty[String], source, options)
-    DataFrame(this, LogicalRelation(resolved.relation))
+    read.format(source).schema(schema).options(options).load()
   }
 
   /**
-   * :: Experimental ::
    * (Scala-specific) Returns the dataset specified by the given data source and
    * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
    * @group genericdata
    * @since 1.3.0
    */
-  @Experimental
+  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
   def load(
       source: String,
       schema: StructType,
-      partitionColumns: Array[String],
       options: Map[String, String]): DataFrame = {
-    val resolved = ResolvedDataSource(this, Some(schema), partitionColumns, source, options)
-    DataFrame(this, LogicalRelation(resolved.relation))
+    read.format(source).schema(schema).options(options).load()
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
index 9d17516e0ef7d..7a73b6f1ac601 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
@@ -90,7 +90,7 @@ private[sql] trait ParquetTest {
       (data: Seq[T])
       (f: String => Unit): Unit = {
     withTempPath { file =>
-      sparkContext.parallelize(data).toDF().saveAsParquetFile(file.getCanonicalPath)
+      sparkContext.parallelize(data).toDF().write.parquet(file.getCanonicalPath)
       f(file.getCanonicalPath)
     }
   }
@@ -102,7 +102,7 @@ private[sql] trait ParquetTest {
   protected def withParquetDataFrame[T <: Product: ClassTag: TypeTag]
       (data: Seq[T])
       (f: DataFrame => Unit): Unit = {
-    withParquetFile(data)(path => f(sqlContext.parquetFile(path)))
+    withParquetFile(data)(path => f(sqlContext.read.parquet(path)))
   }
 
   /**
@@ -128,12 +128,12 @@ private[sql] trait ParquetTest {
 
   protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
       data: Seq[T], path: File): Unit = {
-    data.toDF().save(path.getCanonicalPath, "org.apache.spark.sql.parquet", SaveMode.Overwrite)
+    data.toDF().write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
   }
 
   protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
       df: DataFrame, path: File): Unit = {
-    df.save(path.getCanonicalPath, "org.apache.spark.sql.parquet", SaveMode.Overwrite)
+    df.write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
   }
 
   protected def makePartitionDir(
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java
index b76f7d421f643..6a0bcefe7aa88 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java
@@ -75,9 +75,9 @@ public void setUp() throws IOException {
   public void saveAndLoad() {
     Map<String, String> options = new HashMap<String, String>();
     options.put("path", path.toString());
-    df.save("org.apache.spark.sql.json", SaveMode.ErrorIfExists, options);
+    df.save("json", SaveMode.ErrorIfExists, options);
 
-    DataFrame loadedDF = sqlContext.load("org.apache.spark.sql.json", options);
+    DataFrame loadedDF = sqlContext.read().format("json").options(options).load();
 
     checkAnswer(loadedDF, df.collectAsList());
   }
@@ -86,12 +86,12 @@ public void saveAndLoad() {
   public void saveAndLoadWithSchema() {
     Map<String, String> options = new HashMap<String, String>();
     options.put("path", path.toString());
-    df.save("org.apache.spark.sql.json", SaveMode.ErrorIfExists, options);
+    df.save("json", SaveMode.ErrorIfExists, options);
 
     List<StructField> fields = new ArrayList<StructField>();
     fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
     StructType schema = DataTypes.createStructType(fields);
-    DataFrame loadedDF = sqlContext.load("org.apache.spark.sql.json", schema, options);
+    DataFrame loadedDF = sqlContext.load("json", schema, options);
 
     checkAnswer(loadedDF, sqlContext.sql("SELECT b FROM jsonTable").collectAsList());
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 1d5f6b3aad6fd..054b23dba84c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -460,14 +460,14 @@ class DataFrameSuite extends QueryTest {
   }
 
   test("SPARK-7551: support backticks for DataFrame attribute resolution") {
-    val df = TestSQLContext.jsonRDD(TestSQLContext.sparkContext.makeRDD(
+    val df = TestSQLContext.read.json(TestSQLContext.sparkContext.makeRDD(
       """{"a.b": {"c": {"d..e": {"f": 1}}}}""" :: Nil))
     checkAnswer(
       df.select(df("`a.b`.c.`d..e`.`f`")),
       Row(1)
     )
 
-    val df2 = TestSQLContext.jsonRDD(TestSQLContext.sparkContext.makeRDD(
+    val df2 = TestSQLContext.read.json(TestSQLContext.sparkContext.makeRDD(
       """{"a  b": {"c": {"d  e": {"f": 1}}}}""" :: Nil))
     checkAnswer(
       df2.select(df2("`a  b`.c.d  e.f")),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 479ad9fe621d0..c5c4f448a7224 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -105,7 +105,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("grouping on nested fields") {
-    jsonRDD(sparkContext.parallelize("""{"nested": {"attribute": 1}, "value": 2}""" :: Nil))
+    read.json(sparkContext.parallelize("""{"nested": {"attribute": 1}, "value": 2}""" :: Nil))
      .registerTempTable("rows")
 
     checkAnswer(
@@ -122,7 +122,8 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("SPARK-6201 IN type conversion") {
-    jsonRDD(sparkContext.parallelize(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}")))
+    read.json(
+      sparkContext.parallelize(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}")))
       .registerTempTable("d")
 
     checkAnswer(
@@ -1199,7 +1200,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   test("SPARK-3483 Special chars in column names") {
     val data = sparkContext.parallelize(
       Seq("""{"key?number1": "value1", "key.number2": "value2"}"""))
-    jsonRDD(data).registerTempTable("records")
+    read.json(data).registerTempTable("records")
     sql("SELECT `key?number1`, `key.number2` FROM records")
   }
 
@@ -1240,11 +1241,11 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("SPARK-4322 Grouping field with struct field as sub expression") {
-    jsonRDD(sparkContext.makeRDD("""{"a": {"b": [{"c": 1}]}}""" :: Nil)).registerTempTable("data")
+    read.json(sparkContext.makeRDD("""{"a": {"b": [{"c": 1}]}}""" :: Nil)).registerTempTable("data")
     checkAnswer(sql("SELECT a.b[0].c FROM data GROUP BY a.b[0].c"), Row(1))
     dropTempTable("data")
 
-    jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1}}""" :: Nil)).registerTempTable("data")
+    read.json(sparkContext.makeRDD("""{"a": {"b": 1}}""" :: Nil)).registerTempTable("data")
     checkAnswer(sql("SELECT a.b + 1 FROM data GROUP BY a.b + 1"), Row(2))
     dropTempTable("data")
   }
@@ -1292,7 +1293,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("SPARK-6145: ORDER BY test for nested fields") {
-    jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil))
+    read.json(sparkContext.makeRDD("""{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil))
       .registerTempTable("nestedOrder")
 
     checkAnswer(sql("SELECT 1 FROM nestedOrder ORDER BY a.b"), Row(1))
@@ -1304,14 +1305,14 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   }
 
   test("SPARK-6145: special cases") {
-    jsonRDD(sparkContext.makeRDD(
+    read.json(sparkContext.makeRDD(
       """{"a": {"b": [1]}, "b": [{"a": 1}], "c0": {"a": 1}}""" :: Nil)).registerTempTable("t")
     checkAnswer(sql("SELECT a.b[0] FROM t ORDER BY c0.a"), Row(1))
     checkAnswer(sql("SELECT b[0].a FROM t ORDER BY c0.a"), Row(1))
   }
 
   test("SPARK-6898: complete support for special chars in column names") {
-    jsonRDD(sparkContext.makeRDD(
+    read.json(sparkContext.makeRDD(
       """{"a": {"c.b": 1}, "b.$q": [{"a@!.q": 1}], "q.w": {"w.i&": [1]}}""" :: Nil))
       .registerTempTable("t")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 2672e20deadc5..dc2d43a197f40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -105,13 +105,13 @@ class UserDefinedTypeSuite extends QueryTest {
   test("UDTs with Parquet") {
     val tempDir = Utils.createTempDir()
     tempDir.delete()
-    pointsRDD.saveAsParquetFile(tempDir.getCanonicalPath)
+    pointsRDD.write.parquet(tempDir.getCanonicalPath)
   }
 
   test("Repartition UDTs with Parquet") {
     val tempDir = Utils.createTempDir()
     tempDir.delete()
-    pointsRDD.repartition(1).saveAsParquetFile(tempDir.getCanonicalPath)
+    pointsRDD.repartition(1).write.parquet(tempDir.getCanonicalPath)
   }
 
   // Tests to make sure that all operators correctly convert types on the way out.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index b06e3385980f7..6f747e5846f74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -215,7 +215,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Complex field and type inferring with null in sampling") {
-    val jsonDF = jsonRDD(jsonNullStruct)
+    val jsonDF = read.json(jsonNullStruct)
     val expectedSchema = StructType(
       StructField("headers", StructType(
         StructField("Charset", StringType, true) ::
@@ -234,7 +234,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Primitive field and type inferring") {
-    val jsonDF = jsonRDD(primitiveFieldAndType)
+    val jsonDF = read.json(primitiveFieldAndType)
 
     val expectedSchema = StructType(
       StructField("bigInteger", DecimalType.Unlimited, true) ::
@@ -262,7 +262,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Complex field and type inferring") {
-    val jsonDF = jsonRDD(complexFieldAndType1)
+    val jsonDF = read.json(complexFieldAndType1)
 
     val expectedSchema = StructType(
       StructField("arrayOfArray1", ArrayType(ArrayType(StringType, true), true), true) ::
@@ -361,7 +361,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("GetField operation on complex data type") {
-    val jsonDF = jsonRDD(complexFieldAndType1)
+    val jsonDF = read.json(complexFieldAndType1)
     jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
@@ -377,7 +377,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Type conflict in primitive field values") {
-    val jsonDF = jsonRDD(primitiveFieldValueTypeConflict)
+    val jsonDF = read.json(primitiveFieldValueTypeConflict)
 
     val expectedSchema = StructType(
       StructField("num_bool", StringType, true) ::
@@ -451,7 +451,7 @@ class JsonSuite extends QueryTest {
   }
 
   ignore("Type conflict in primitive field values (Ignored)") {
-    val jsonDF = jsonRDD(primitiveFieldValueTypeConflict)
+    val jsonDF = read.json(primitiveFieldValueTypeConflict)
     jsonDF.registerTempTable("jsonTable")
 
     // Right now, the analyzer does not promote strings in a boolean expression.
@@ -504,7 +504,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Type conflict in complex field values") {
-    val jsonDF = jsonRDD(complexFieldValueTypeConflict)
+    val jsonDF = read.json(complexFieldValueTypeConflict)
 
     val expectedSchema = StructType(
       StructField("array", ArrayType(LongType, true), true) ::
@@ -528,7 +528,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Type conflict in array elements") {
-    val jsonDF = jsonRDD(arrayElementTypeConflict)
+    val jsonDF = read.json(arrayElementTypeConflict)
 
     val expectedSchema = StructType(
       StructField("array1", ArrayType(StringType, true), true) ::
@@ -556,7 +556,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("Handling missing fields") {
-    val jsonDF = jsonRDD(missingFields)
+    val jsonDF = read.json(missingFields)
 
     val expectedSchema = StructType(
       StructField("a", BooleanType, true) ::
@@ -576,7 +576,7 @@ class JsonSuite extends QueryTest {
     dir.delete()
     val path = dir.getCanonicalPath
     sparkContext.parallelize(1 to 100).map(i => s"""{"a": 1, "b": "str$i"}""").saveAsTextFile(path)
-    val jsonDF = jsonFile(path, 0.49)
+    val jsonDF = read.option("samplingRatio", "0.49").json(path)
 
     val analyzed = jsonDF.queryExecution.analyzed
     assert(
@@ -591,7 +591,7 @@ class JsonSuite extends QueryTest {
 
     val schema = StructType(StructField("a", LongType, true) :: Nil)
     val logicalRelation =
-      jsonFile(path, schema).queryExecution.analyzed.asInstanceOf[LogicalRelation]
+      read.schema(schema).json(path).queryExecution.analyzed.asInstanceOf[LogicalRelation]
     val relationWithSchema = logicalRelation.relation.asInstanceOf[JSONRelation]
     assert(relationWithSchema.path === Some(path))
     assert(relationWithSchema.schema === schema)
@@ -603,7 +603,7 @@ class JsonSuite extends QueryTest {
     dir.delete()
     val path = dir.getCanonicalPath
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
-    val jsonDF = jsonFile(path)
+    val jsonDF = read.json(path)
 
     val expectedSchema = StructType(
       StructField("bigInteger", DecimalType.Unlimited, true) ::
@@ -672,7 +672,7 @@ class JsonSuite extends QueryTest {
       StructField("null", StringType, true) ::
       StructField("string", StringType, true) :: Nil)
 
-    val jsonDF1 = jsonFile(path, schema)
+    val jsonDF1 = read.schema(schema).json(path)
 
     assert(schema === jsonDF1.schema)
 
@@ -689,7 +689,7 @@ class JsonSuite extends QueryTest {
       "this is a simple string.")
     )
 
-    val jsonDF2 = jsonRDD(primitiveFieldAndType, schema)
+    val jsonDF2 = read.schema(schema).json(primitiveFieldAndType)
 
     assert(schema === jsonDF2.schema)
 
@@ -710,7 +710,7 @@ class JsonSuite extends QueryTest {
   test("Applying schemas with MapType") {
     val schemaWithSimpleMap = StructType(
       StructField("map", MapType(StringType, IntegerType, true), false) :: Nil)
-    val jsonWithSimpleMap = jsonRDD(mapType1, schemaWithSimpleMap)
+    val jsonWithSimpleMap = read.schema(schemaWithSimpleMap).json(mapType1)
 
     jsonWithSimpleMap.registerTempTable("jsonWithSimpleMap")
 
@@ -738,7 +738,7 @@ class JsonSuite extends QueryTest {
     val schemaWithComplexMap = StructType(
       StructField("map", MapType(StringType, innerStruct, true), false) :: Nil)
 
-    val jsonWithComplexMap = jsonRDD(mapType2, schemaWithComplexMap)
+    val jsonWithComplexMap = read.schema(schemaWithComplexMap).json(mapType2)
 
     jsonWithComplexMap.registerTempTable("jsonWithComplexMap")
 
@@ -764,7 +764,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("SPARK-2096 Correctly parse dot notations") {
-    val jsonDF = jsonRDD(complexFieldAndType2)
+    val jsonDF = read.json(complexFieldAndType2)
     jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
@@ -782,7 +782,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("SPARK-3390 Complex arrays") {
-    val jsonDF = jsonRDD(complexFieldAndType2)
+    val jsonDF = read.json(complexFieldAndType2)
     jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
@@ -805,7 +805,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("SPARK-3308 Read top level JSON arrays") {
-    val jsonDF = jsonRDD(jsonArray)
+    val jsonDF = read.json(jsonArray)
     jsonDF.registerTempTable("jsonTable")
 
     checkAnswer(
@@ -826,7 +826,7 @@ class JsonSuite extends QueryTest {
     val oldColumnNameOfCorruptRecord = TestSQLContext.conf.columnNameOfCorruptRecord
     TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, "_unparsed")
 
-    val jsonDF = jsonRDD(corruptRecords)
+    val jsonDF = read.json(corruptRecords)
     jsonDF.registerTempTable("jsonTable")
 
     val schema = StructType(
@@ -880,7 +880,7 @@ class JsonSuite extends QueryTest {
   }
 
   test("SPARK-4068: nulls in arrays") {
-    val jsonDF = jsonRDD(nullsInArrays)
+    val jsonDF = read.json(nullsInArrays)
     jsonDF.registerTempTable("jsonTable")
 
     val schema = StructType(
@@ -957,8 +957,8 @@ class JsonSuite extends QueryTest {
     assert(result2(1) === "{\"f1\":{\"f11\":2,\"f12\":false},\"f2\":{\"B2\":null}}")
     assert(result2(3) === "{\"f1\":{\"f11\":4,\"f12\":true},\"f2\":{\"D4\":2147483644}}")
 
-    val jsonDF = jsonRDD(primitiveFieldAndType)
-    val primTable = jsonRDD(jsonDF.toJSON)
+    val jsonDF = read.json(primitiveFieldAndType)
+    val primTable = read.json(jsonDF.toJSON)
     primTable.registerTempTable("primativeTable")
     checkAnswer(
         sql("select * from primativeTable"),
@@ -970,8 +970,8 @@ class JsonSuite extends QueryTest {
         "this is a simple string.")
       )
 
-    val complexJsonDF = jsonRDD(complexFieldAndType1)
-    val compTable = jsonRDD(complexJsonDF.toJSON)
+    val complexJsonDF = read.json(complexFieldAndType1)
+    val compTable = read.json(complexJsonDF.toJSON)
     compTable.registerTempTable("complexTable")
     // Access elements of a primitive array.
     checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
index 5ad439584716f..bdc2ebabc5e9a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala
@@ -328,12 +328,12 @@ class ParquetDataSourceOnFilterSuite extends ParquetFilterSuiteBase with BeforeA
     withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED -> "true") {
       withTempPath { dir =>
         val path = s"${dir.getCanonicalPath}/part=1"
-        (1 to 3).map(i => (i, i.toString)).toDF("a", "b").saveAsParquetFile(path)
+        (1 to 3).map(i => (i, i.toString)).toDF("a", "b").write.parquet(path)
 
         // If the "part = 1" filter gets pushed down, this query will throw an exception since
         // "part" is not a valid column in the actual Parquet file
         checkAnswer(
-          sqlContext.parquetFile(path).filter("part = 1"),
+          sqlContext.read.parquet(path).filter("part = 1"),
           (1 to 3).map(i => Row(i, i.toString, 1)))
       }
     }
@@ -357,7 +357,7 @@ class ParquetDataSourceOffFilterSuite extends ParquetFilterSuiteBase with Before
     withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED -> "true") {
       withTempPath { dir =>
         val path = s"${dir.getCanonicalPath}/part=1"
-        (1 to 3).map(i => (i, i.toString)).toDF("a", "b").saveAsParquetFile(path)
+        (1 to 3).map(i => (i, i.toString)).toDF("a", "b").write.parquet(path)
 
         // If the "part = 1" filter gets pushed down, this query will throw an exception since
         // "part" is not a valid column in the actual Parquet file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
index 008443df216aa..dd48bb350f26d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
@@ -114,24 +114,24 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     for ((precision, scale) <- Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17))) {
       withTempPath { dir =>
         val data = makeDecimalRDD(DecimalType(precision, scale))
-        data.saveAsParquetFile(dir.getCanonicalPath)
-        checkAnswer(parquetFile(dir.getCanonicalPath), data.collect().toSeq)
+        data.write.parquet(dir.getCanonicalPath)
+        checkAnswer(read.parquet(dir.getCanonicalPath), data.collect().toSeq)
       }
     }
 
     // Decimals with precision above 18 are not yet supported
     intercept[Throwable] {
       withTempPath { dir =>
-        makeDecimalRDD(DecimalType(19, 10)).saveAsParquetFile(dir.getCanonicalPath)
-        parquetFile(dir.getCanonicalPath).collect()
+        makeDecimalRDD(DecimalType(19, 10)).write.parquet(dir.getCanonicalPath)
+        read.parquet(dir.getCanonicalPath).collect()
       }
     }
 
     // Unlimited-length decimals are not yet supported
     intercept[Throwable] {
       withTempPath { dir =>
-        makeDecimalRDD(DecimalType.Unlimited).saveAsParquetFile(dir.getCanonicalPath)
-        parquetFile(dir.getCanonicalPath).collect()
+        makeDecimalRDD(DecimalType.Unlimited).write.parquet(dir.getCanonicalPath)
+        read.parquet(dir.getCanonicalPath).collect()
       }
     }
   }
@@ -146,8 +146,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
 
     withTempPath { dir =>
       val data = makeDateRDD()
-      data.saveAsParquetFile(dir.getCanonicalPath)
-      checkAnswer(parquetFile(dir.getCanonicalPath), data.collect().toSeq)
+      data.write.parquet(dir.getCanonicalPath)
+      checkAnswer(read.parquet(dir.getCanonicalPath), data.collect().toSeq)
     }
   }
 
@@ -283,7 +283,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     withTempDir { dir =>
       val path = new Path(dir.toURI.toString, "part-r-0.parquet")
       makeRawParquetFile(path)
-      checkAnswer(parquetFile(path.toString), (0 until 10).map { i =>
+      checkAnswer(read.parquet(path.toString), (0 until 10).map { i =>
         Row(i % 2 == 0, i, i.toLong, i.toFloat, i.toDouble)
       })
     }
@@ -311,8 +311,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
   test("save - overwrite") {
     withParquetFile((1 to 10).map(i => (i, i.toString))) { file =>
       val newData = (11 to 20).map(i => (i, i.toString))
-      newData.toDF().save("org.apache.spark.sql.parquet", SaveMode.Overwrite, Map("path" -> file))
-      checkAnswer(parquetFile(file), newData.map(Row.fromTuple))
+      newData.toDF().write.format("parquet").mode(SaveMode.Overwrite).save(file)
+      checkAnswer(read.parquet(file), newData.map(Row.fromTuple))
     }
   }
 
@@ -320,8 +320,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     val data = (1 to 10).map(i => (i, i.toString))
     withParquetFile(data) { file =>
       val newData = (11 to 20).map(i => (i, i.toString))
-      newData.toDF().save("org.apache.spark.sql.parquet", SaveMode.Ignore, Map("path" -> file))
-      checkAnswer(parquetFile(file), data.map(Row.fromTuple))
+      newData.toDF().write.format("parquet").mode(SaveMode.Ignore).save(file)
+      checkAnswer(read.parquet(file), data.map(Row.fromTuple))
     }
   }
 
@@ -330,8 +330,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     withParquetFile(data) { file =>
       val newData = (11 to 20).map(i => (i, i.toString))
       val errorMessage = intercept[Throwable] {
-        newData.toDF().save(
-          "org.apache.spark.sql.parquet", SaveMode.ErrorIfExists, Map("path" -> file))
+        newData.toDF().write.format("parquet").mode(SaveMode.ErrorIfExists).save(file)
       }.getMessage
       assert(errorMessage.contains("already exists"))
     }
@@ -341,8 +340,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     val data = (1 to 10).map(i => (i, i.toString))
     withParquetFile(data) { file =>
       val newData = (11 to 20).map(i => (i, i.toString))
-      newData.toDF().save("org.apache.spark.sql.parquet", SaveMode.Append, Map("path" -> file))
-      checkAnswer(parquetFile(file), (data ++ newData).map(Row.fromTuple))
+      newData.toDF().write.format("parquet").mode(SaveMode.Append).save(file)
+      checkAnswer(read.parquet(file), (data ++ newData).map(Row.fromTuple))
     }
   }
 
@@ -374,7 +373,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
         path,
         new Footer(path, new ParquetMetadata(fileMetadata, Nil)) :: Nil)
 
-      assertResult(parquetFile(path.toString).schema) {
+      assertResult(read.parquet(path.toString).schema) {
         StructType(
           StructField("a", BooleanType, nullable = false) ::
           StructField("b", IntegerType, nullable = false) ::
@@ -392,7 +391,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
       sqlContext.udf.register("div0", (x: Int) => x / 0)
       withTempPath { dir =>
         intercept[org.apache.spark.SparkException] {
-          sqlContext.sql("select div0(1)").saveAsParquetFile(dir.getCanonicalPath)
+          sqlContext.sql("select div0(1)").write.parquet(dir.getCanonicalPath)
         }
         val path = new Path(dir.getCanonicalPath, "_temporary")
         val fs = path.getFileSystem(configuration)
@@ -421,10 +420,10 @@ class ParquetDataSourceOnIOSuite extends ParquetIOSuiteBase with BeforeAndAfterA
     // In 1.3.0, save to fs other than file: without configuring core-site.xml would get:
     // IllegalArgumentException: Wrong FS: hdfs://..., expected: file:///
     intercept[Throwable] {
-      sqlContext.parquetFile("file:///nonexistent")
+      sqlContext.read.parquet("file:///nonexistent")
     }
     val errorMessage = intercept[Throwable] {
-      sqlContext.parquetFile("hdfs://nonexistent")
+      sqlContext.read.parquet("hdfs://nonexistent")
     }.toString
     assert(errorMessage.contains("UnknownHostException"))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index 138e19766dc88..8079c460713da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -155,7 +155,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      parquetFile(base.getCanonicalPath).registerTempTable("t")
+      read.parquet(base.getCanonicalPath).registerTempTable("t")
 
       withTempTable("t") {
         checkAnswer(
@@ -202,7 +202,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      parquetFile(base.getCanonicalPath).registerTempTable("t")
+      read.parquet(base.getCanonicalPath).registerTempTable("t")
 
       withTempTable("t") {
         checkAnswer(
@@ -250,10 +250,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      val parquetRelation = load(
-        "org.apache.spark.sql.parquet",
-        Map("path" -> base.getCanonicalPath))
-
+      val parquetRelation = read.format("org.apache.spark.sql.parquet").load(base.getCanonicalPath)
       parquetRelation.registerTempTable("t")
 
       withTempTable("t") {
@@ -293,10 +290,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      val parquetRelation = load(
-        "org.apache.spark.sql.parquet",
-        Map("path" -> base.getCanonicalPath))
-
+      val parquetRelation = read.format("org.apache.spark.sql.parquet").load(base.getCanonicalPath)
       parquetRelation.registerTempTable("t")
 
       withTempTable("t") {
@@ -328,7 +322,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
         (1 to 10).map(i => (i, i.toString)).toDF("intField", "stringField"),
         makePartitionDir(base, defaultPartitionName, "pi" -> 2))
 
-      load(base.getCanonicalPath, "org.apache.spark.sql.parquet").registerTempTable("t")
+      read.format("org.apache.spark.sql.parquet").load(base.getCanonicalPath).registerTempTable("t")
 
       withTempTable("t") {
         checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 4e54b2eb8df7a..d2d1011b8e917 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -33,7 +33,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
   override def beforeAll(): Unit = {
     path = Utils.createTempDir()
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-    jsonRDD(rdd).registerTempTable("jt")
+    read.json(rdd).registerTempTable("jt")
   }
 
   override def afterAll(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index d1d427e1790bd..6f375ef36237d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -33,7 +33,7 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
   override def beforeAll: Unit = {
     path = Utils.createTempDir()
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-    jsonRDD(rdd).registerTempTable("jt")
+    read.json(rdd).registerTempTable("jt")
     sql(
       s"""
         |CREATE TEMPORARY TABLE jsonTable (a int, b string)
@@ -109,7 +109,7 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
 
     // Writing the table to less part files.
     val rdd1 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""), 5)
-    jsonRDD(rdd1).registerTempTable("jt1")
+    read.json(rdd1).registerTempTable("jt1")
     sql(
       s"""
          |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt1
@@ -121,7 +121,7 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
 
     // Writing the table to more part files.
     val rdd2 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""), 10)
-    jsonRDD(rdd2).registerTempTable("jt2")
+    read.json(rdd2).registerTempTable("jt2")
     sql(
       s"""
          |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt2
@@ -154,13 +154,13 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
   }
 
   test("save directly to the path of a JSON table") {
-    table("jt").selectExpr("a * 5 as a", "b").save(path.toString, "json", SaveMode.Overwrite)
+    table("jt").selectExpr("a * 5 as a", "b").write.mode(SaveMode.Overwrite).json(path.toString)
     checkAnswer(
       sql("SELECT a, b FROM jsonTable"),
       (1 to 10).map(i => Row(i * 5, s"str$i"))
     )
 
-    table("jt").save(path.toString, "json", SaveMode.Overwrite)
+    table("jt").write.mode(SaveMode.Overwrite).json(path.toString)
     checkAnswer(
       sql("SELECT a, b FROM jsonTable"),
       (1 to 10).map(i => Row(i, s"str$i"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index 6567d1acd7644..7a28e9af3673c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -42,7 +42,7 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
     path.delete()
 
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-    df = jsonRDD(rdd)
+    df = read.json(rdd)
     df.registerTempTable("jsonTable")
   }
 
@@ -57,41 +57,41 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
 
   def checkLoad(): Unit = {
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json")
-    checkAnswer(load(path.toString), df.collect())
+    checkAnswer(read.load(path.toString), df.collect())
 
     // Test if we can pick up the data source name passed in load.
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name")
-    checkAnswer(load(path.toString, "org.apache.spark.sql.json"), df.collect())
-    checkAnswer(load("org.apache.spark.sql.json", Map("path" -> path.toString)), df.collect())
+    checkAnswer(read.format("json").load(path.toString), df.collect())
+    checkAnswer(read.format("json").load(path.toString), df.collect())
     val schema = StructType(StructField("b", StringType, true) :: Nil)
     checkAnswer(
-      load("org.apache.spark.sql.json", schema, Map("path" -> path.toString)),
+      read.format("json").schema(schema).load(path.toString),
       sql("SELECT b FROM jsonTable").collect())
   }
 
   test("save with path and load") {
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json")
-    df.save(path.toString)
+    df.write.save(path.toString)
     checkLoad()
   }
 
   test("save with path and datasource, and load") {
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name")
-    df.save(path.toString, "org.apache.spark.sql.json")
+    df.write.json(path.toString)
     checkLoad()
   }
 
   test("save with data source and options, and load") {
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name")
-    df.save("org.apache.spark.sql.json", SaveMode.ErrorIfExists, Map("path" -> path.toString))
+    df.write.mode(SaveMode.ErrorIfExists).json(path.toString)
     checkLoad()
   }
 
   test("save and save again") {
-    df.save(path.toString, "org.apache.spark.sql.json")
+    df.write.json(path.toString)
 
     var message = intercept[RuntimeException] {
-      df.save(path.toString, "org.apache.spark.sql.json")
+      df.write.json(path.toString)
     }.getMessage
 
     assert(
@@ -100,14 +100,14 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
 
     if (path.exists()) Utils.deleteRecursively(path)
 
-    df.save(path.toString, "org.apache.spark.sql.json")
+    df.write.json(path.toString)
     checkLoad()
 
-    df.save("org.apache.spark.sql.json", SaveMode.Overwrite, Map("path" -> path.toString))
+    df.write.mode(SaveMode.Overwrite).json(path.toString)
     checkLoad()
 
     message = intercept[RuntimeException] {
-      df.save("org.apache.spark.sql.json", SaveMode.Append, Map("path" -> path.toString))
+      df.write.mode(SaveMode.Append).json(path.toString)
     }.getMessage
 
     assert(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index d46a127d47d31..c6b65106452bf 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -140,7 +140,7 @@ private[hive] trait HiveStrategies {
               PhysicalRDD(plan.output, sparkContext.emptyRDD[Row]) :: Nil
             } else {
               hiveContext
-                .parquetFile(partitionLocations: _*)
+                .read.parquet(partitionLocations: _*)
                 .addPartitioningAttributes(relation.partitionKeys)
                 .lowerCase
                 .where(unresolvedOtherPredicates)
@@ -152,7 +152,7 @@ private[hive] trait HiveStrategies {
 
           } else {
             hiveContext
-              .parquetFile(relation.hiveQlTable.getDataLocation.toString)
+              .read.parquet(relation.hiveQlTable.getDataLocation.toString)
               .lowerCase
               .where(unresolvedOtherPredicates)
               .select(unresolvedProjection: _*)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 7ff5719adb3ab..5a5ea10e3c82e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -55,8 +55,8 @@ class HiveParquetSuite extends QueryTest with ParquetTest {
 
     test(s"$prefix: Converting Hive to Parquet Table via saveAsParquetFile") {
       withTempPath { dir =>
-        sql("SELECT * FROM src").saveAsParquetFile(dir.getCanonicalPath)
-        parquetFile(dir.getCanonicalPath).registerTempTable("p")
+        sql("SELECT * FROM src").write.parquet(dir.getCanonicalPath)
+        read.parquet(dir.getCanonicalPath).registerTempTable("p")
         withTempTable("p") {
           checkAnswer(
             sql("SELECT * FROM src ORDER BY key"),
@@ -68,8 +68,8 @@ class HiveParquetSuite extends QueryTest with ParquetTest {
     test(s"$prefix: INSERT OVERWRITE TABLE Parquet table") {
       withParquetTable((1 to 10).map(i => (i, s"val_$i")), "t") {
         withTempPath { file =>
-          sql("SELECT * FROM t LIMIT 1").saveAsParquetFile(file.getCanonicalPath)
-          parquetFile(file.getCanonicalPath).registerTempTable("p")
+          sql("SELECT * FROM t LIMIT 1").write.parquet(file.getCanonicalPath)
+          read.parquet(file.getCanonicalPath).registerTempTable("p")
           withTempTable("p") {
             // let's do three overwrites for good measure
             sql("INSERT OVERWRITE TABLE p SELECT * FROM t")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 1bf1c1be3e3d3..58b0b80c31e2e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -60,7 +60,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
     checkAnswer(
       sql("SELECT * FROM jsonTable"),
-      jsonFile(filePath).collect().toSeq)
+      read.json(filePath).collect().toSeq)
   }
 
   test ("persistent JSON table with a user specified schema") {
@@ -77,7 +77,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
         |)
       """.stripMargin)
 
-    jsonFile(filePath).registerTempTable("expectedJsonTable")
+    read.json(filePath).registerTempTable("expectedJsonTable")
 
     checkAnswer(
       sql("SELECT a, b, `c_!@(3)`, `<d>`.`d!`, `<d>`.`=` FROM jsonTable"),
@@ -104,7 +104,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
     assert(expectedSchema === table("jsonTable").schema)
 
-    jsonFile(filePath).registerTempTable("expectedJsonTable")
+    read.json(filePath).registerTempTable("expectedJsonTable")
 
     checkAnswer(
       sql("SELECT b, `<d>`.`=` FROM jsonTable"),
@@ -123,7 +123,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
     checkAnswer(
       sql("SELECT * FROM jsonTable"),
-      jsonFile(filePath).collect().toSeq)
+      read.json(filePath).collect().toSeq)
   }
 
   test("drop table") {
@@ -138,7 +138,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
     checkAnswer(
       sql("SELECT * FROM jsonTable"),
-      jsonFile(filePath).collect().toSeq)
+      read.json(filePath).collect().toSeq)
 
     sql("DROP TABLE jsonTable")
 
@@ -241,7 +241,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
         |)
       """.stripMargin)
 
-    jsonFile(filePath).registerTempTable("expectedJsonTable")
+    read.json(filePath).registerTempTable("expectedJsonTable")
 
     checkAnswer(
       sql("SELECT * FROM jsonTable"),
@@ -474,7 +474,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     // Drop table will also delete the data.
     sql("DROP TABLE savedJsonTable")
     intercept[InvalidInputException] {
-      jsonFile(catalog.hiveDefaultTableFilePath("savedJsonTable"))
+      read.json(catalog.hiveDefaultTableFilePath("savedJsonTable"))
     }
 
     // Create an external table by specifying the path.
@@ -491,7 +491,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     // Data should not be deleted after we drop the table.
     sql("DROP TABLE savedJsonTable")
     checkAnswer(
-      jsonFile(tempPath.toString),
+      read.json(tempPath.toString),
       df.collect())
 
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, originalDefaultSource)
@@ -526,7 +526,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     // Data should not be deleted.
     sql("DROP TABLE createdJsonTable")
     checkAnswer(
-      jsonFile(tempPath.toString),
+      read.json(tempPath.toString),
       df.collect())
 
     // Try to specify the schema.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index b6be09e2f8837..a0075f1e44ca8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -688,11 +688,11 @@ class ParquetDataSourceOnSourceSuite extends ParquetSourceSuiteBase {
 
     val df = Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str")
     val df2 = df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
-    intercept[Throwable](df2.saveAsParquetFile(filePath))
+    intercept[Throwable](df2.write.parquet(filePath))
 
     val df3 = df2.toDF("str", "max_int")
-    df3.saveAsParquetFile(filePath2)
-    val df4 = parquetFile(filePath2)
+    df3.write.parquet(filePath2)
+    val df4 = read.parquet(filePath2)
     checkAnswer(df4, Row("1", 1) :: Row("2", 2) :: Row("3", 3) :: Nil)
     assert(df4.columns === Array("str", "max_int"))
   }
@@ -731,14 +731,14 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
       sparkContext.makeRDD(1 to 10)
         .map(i => ParquetData(i, s"part-$p"))
         .toDF()
-        .saveAsParquetFile(partDir.getCanonicalPath)
+        .write.parquet(partDir.getCanonicalPath)
     }
 
     sparkContext
       .makeRDD(1 to 10)
       .map(i => ParquetData(i, s"part-1"))
       .toDF()
-      .saveAsParquetFile(new File(normalTableDir, "normal").getCanonicalPath)
+      .write.parquet(new File(normalTableDir, "normal").getCanonicalPath)
 
     partitionedTableDirWithKey = Utils.createTempDir()
 
@@ -747,7 +747,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
       sparkContext.makeRDD(1 to 10)
         .map(i => ParquetDataWithKey(p, i, s"part-$p"))
         .toDF()
-        .saveAsParquetFile(partDir.getCanonicalPath)
+        .write.parquet(partDir.getCanonicalPath)
     }
 
     partitionedTableDirWithKeyAndComplexTypes = Utils.createTempDir()
@@ -757,7 +757,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
       sparkContext.makeRDD(1 to 10).map { i =>
         ParquetDataWithKeyAndComplexTypes(
           p, i, s"part-$p", StructContainer(i, f"${i}_string"), 1 to i)
-      }.toDF().saveAsParquetFile(partDir.getCanonicalPath)
+      }.toDF().write.parquet(partDir.getCanonicalPath)
     }
 
     partitionedTableDirWithComplexTypes = Utils.createTempDir()
@@ -766,7 +766,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
       val partDir = new File(partitionedTableDirWithComplexTypes, s"p=$p")
       sparkContext.makeRDD(1 to 10).map { i =>
         ParquetDataWithComplexTypes(i, s"part-$p", StructContainer(i, f"${i}_string"), 1 to i)
-      }.toDF().saveAsParquetFile(partDir.getCanonicalPath)
+      }.toDF().write.parquet(partDir.getCanonicalPath)
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
index cf6afd25ae5a0..f44b3c521e647 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -92,44 +92,27 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
   test("save()/load() - non-partitioned table - Overwrite") {
     withTempPath { file =>
-      testDF.save(
-        path = file.getCanonicalPath,
-        source = dataSourceName,
-        mode = SaveMode.Overwrite)
-
-      testDF.save(
-        path = file.getCanonicalPath,
-        source = dataSourceName,
-        mode = SaveMode.Overwrite)
+      testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath)
+      testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath)
 
       checkAnswer(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchema.json)),
+        read.format(dataSourceName)
+          .option("path", file.getCanonicalPath)
+          .option("dataSchema", dataSchema.json)
+          .load(),
         testDF.collect())
     }
   }
 
   test("save()/load() - non-partitioned table - Append") {
     withTempPath { file =>
-      testDF.save(
-        path = file.getCanonicalPath,
-        source = dataSourceName,
-        mode = SaveMode.Overwrite)
-
-      testDF.save(
-        path = file.getCanonicalPath,
-        source = dataSourceName,
-        mode = SaveMode.Append)
+      testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath)
+      testDF.write.mode(SaveMode.Append).format(dataSourceName).save(file.getCanonicalPath)
 
       checkAnswer(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchema.json)).orderBy("a"),
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchema.json)
+          .load(file.getCanonicalPath).orderBy("a"),
         testDF.unionAll(testDF).orderBy("a").collect())
     }
   }
@@ -147,10 +130,7 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
   test("save()/load() - non-partitioned table - Ignore") {
     withTempDir { file =>
-      testDF.save(
-        path = file.getCanonicalPath,
-        source = dataSourceName,
-        mode = SaveMode.Ignore)
+      testDF.write.mode(SaveMode.Ignore).format(dataSourceName).save(file.getCanonicalPath)
 
       val path = new Path(file.getCanonicalPath)
       val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
@@ -160,89 +140,81 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
   test("save()/load() - partitioned table - simple queries") {
     withTempPath { file =>
-      partitionedTestDF.save(
-        source = dataSourceName,
-        mode = SaveMode.ErrorIfExists,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.ErrorIfExists)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
 
       checkQueries(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchema.json)))
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchema.json)
+          .load(file.getCanonicalPath))
     }
   }
 
   test("save()/load() - partitioned table - Overwrite") {
     withTempPath { file =>
-      partitionedTestDF.save(
-        source = dataSourceName,
-        mode = SaveMode.Overwrite,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
-
-      partitionedTestDF.save(
-        source = dataSourceName,
-        mode = SaveMode.Overwrite,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.Overwrite)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
+
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.Overwrite)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
 
       checkAnswer(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchema.json)),
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchema.json)
+          .load(file.getCanonicalPath),
         partitionedTestDF.collect())
     }
   }
 
   test("save()/load() - partitioned table - Append") {
     withTempPath { file =>
-      partitionedTestDF.save(
-        source = dataSourceName,
-        mode = SaveMode.Overwrite,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
-
-      partitionedTestDF.save(
-        source = dataSourceName,
-        mode = SaveMode.Append,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.Overwrite)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
+
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.Append)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
 
       checkAnswer(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchema.json)),
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchema.json)
+          .load(file.getCanonicalPath),
         partitionedTestDF.unionAll(partitionedTestDF).collect())
     }
   }
 
   test("save()/load() - partitioned table - Append - new partition values") {
     withTempPath { file =>
-      partitionedTestDF1.save(
-        source = dataSourceName,
-        mode = SaveMode.Overwrite,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
-
-      partitionedTestDF2.save(
-        source = dataSourceName,
-        mode = SaveMode.Append,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
+      partitionedTestDF1.write
+        .format(dataSourceName)
+        .mode(SaveMode.Overwrite)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
+
+      partitionedTestDF2.write
+        .format(dataSourceName)
+        .mode(SaveMode.Append)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
 
       checkAnswer(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchema.json)),
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchema.json)
+          .load(file.getCanonicalPath),
         partitionedTestDF.collect())
     }
   }
@@ -250,11 +222,11 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   test("save()/load() - partitioned table - ErrorIfExists") {
     withTempDir { file =>
       intercept[RuntimeException] {
-        partitionedTestDF.save(
-          source = dataSourceName,
-          mode = SaveMode.ErrorIfExists,
-          options = Map("path" -> file.getCanonicalPath),
-          partitionColumns = Seq("p1", "p2"))
+        partitionedTestDF.write
+          .format(dataSourceName)
+          .mode(SaveMode.ErrorIfExists)
+          .partitionBy("p1", "p2")
+          .save(file.getCanonicalPath)
       }
     }
   }
@@ -343,19 +315,19 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   }
 
   test("saveAsTable()/load() - partitioned table - Overwrite") {
-    partitionedTestDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
-
-    partitionedTestDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
+    partitionedTestDF.write
+      .format(dataSourceName)
+      .mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
+
+    partitionedTestDF.write
+      .format(dataSourceName)
+      .mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
 
     withTable("t") {
       checkAnswer(table("t"), partitionedTestDF.collect())
@@ -363,19 +335,19 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   }
 
   test("saveAsTable()/load() - partitioned table - Append") {
-    partitionedTestDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
-
-    partitionedTestDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Append,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
+    partitionedTestDF.write
+      .format(dataSourceName)
+      .mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
+
+    partitionedTestDF.write
+      .format(dataSourceName)
+      .mode(SaveMode.Append)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
 
     withTable("t") {
       checkAnswer(table("t"), partitionedTestDF.unionAll(partitionedTestDF).collect())
@@ -383,19 +355,19 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   }
 
   test("saveAsTable()/load() - partitioned table - Append - new partition values") {
-    partitionedTestDF1.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
-
-    partitionedTestDF2.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Append,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
+    partitionedTestDF1.write
+      .format(dataSourceName)
+      .mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
+
+    partitionedTestDF2.write
+      .format(dataSourceName)
+      .mode(SaveMode.Append)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
 
     withTable("t") {
       checkAnswer(table("t"), partitionedTestDF.collect())
@@ -403,31 +375,31 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   }
 
   test("saveAsTable()/load() - partitioned table - Append - mismatched partition columns") {
-    partitionedTestDF1.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      options = Map("dataSchema" -> dataSchema.json),
-      partitionColumns = Seq("p1", "p2"))
+    partitionedTestDF1.write
+      .format(dataSourceName)
+      .mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .partitionBy("p1", "p2")
+      .saveAsTable("t")
 
     // Using only a subset of all partition columns
     intercept[Throwable] {
-      partitionedTestDF2.saveAsTable(
-        tableName = "t",
-        source = dataSourceName,
-        mode = SaveMode.Append,
-        options = Map("dataSchema" -> dataSchema.json),
-        partitionColumns = Seq("p1"))
+      partitionedTestDF2.write
+        .format(dataSourceName)
+        .mode(SaveMode.Append)
+        .option("dataSchema", dataSchema.json)
+        .partitionBy("p1")
+        .saveAsTable("t")
     }
 
     // Using different order of partition columns
     intercept[Throwable] {
-      partitionedTestDF2.saveAsTable(
-        tableName = "t",
-        source = dataSourceName,
-        mode = SaveMode.Append,
-        options = Map("dataSchema" -> dataSchema.json),
-        partitionColumns = Seq("p2", "p1"))
+      partitionedTestDF2.write
+        .format(dataSourceName)
+        .mode(SaveMode.Append)
+        .option("dataSchema", dataSchema.json)
+        .partitionBy("p2", "p1")
+        .saveAsTable("t")
     }
   }
 
@@ -436,12 +408,12 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
     withTempTable("t") {
       intercept[AnalysisException] {
-        partitionedTestDF.saveAsTable(
-          tableName = "t",
-          source = dataSourceName,
-          mode = SaveMode.ErrorIfExists,
-          options = Map("dataSchema" -> dataSchema.json),
-          partitionColumns = Seq("p1", "p2"))
+        partitionedTestDF.write
+          .format(dataSourceName)
+          .mode(SaveMode.ErrorIfExists)
+          .option("dataSchema", dataSchema.json)
+          .partitionBy("p1", "p2")
+          .saveAsTable("t")
       }
     }
   }
@@ -450,12 +422,12 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
     Seq.empty[(Int, String)].toDF().registerTempTable("t")
 
     withTempTable("t") {
-      partitionedTestDF.saveAsTable(
-        tableName = "t",
-        source = dataSourceName,
-        mode = SaveMode.Ignore,
-        options = Map("dataSchema" -> dataSchema.json),
-        partitionColumns = Seq("p1", "p2"))
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.Ignore)
+        .option("dataSchema", dataSchema.json)
+        .partitionBy("p1", "p2")
+        .saveAsTable("t")
 
       assert(table("t").collect().isEmpty)
     }
@@ -463,17 +435,16 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
   test("Hadoop style globbing") {
     withTempPath { file =>
-      partitionedTestDF.save(
-        source = dataSourceName,
-        mode = SaveMode.Overwrite,
-        options = Map("path" -> file.getCanonicalPath),
-        partitionColumns = Seq("p1", "p2"))
-
-      val df = load(
-        source = dataSourceName,
-        options = Map(
-          "path" -> s"${file.getCanonicalPath}/p1=*/p2=???",
-          "dataSchema" -> dataSchema.json))
+      partitionedTestDF.write
+        .format(dataSourceName)
+        .mode(SaveMode.Overwrite)
+        .partitionBy("p1", "p2")
+        .save(file.getCanonicalPath)
+
+      val df = read
+        .format(dataSourceName)
+        .option("dataSchema", dataSchema.json)
+        .load(s"${file.getCanonicalPath}/p1=*/p2=???")
 
       val expectedPaths = Set(
         s"${file.getCanonicalFile}/p1=1/p2=foo",

From d41ae4344c07064de03a120804830886e1614d92 Mon Sep 17 00:00:00 2001
From: FavioVazquez <favio.vazquezp@gmail.com>
Date: Sat, 16 May 2015 08:07:03 +0100
Subject: [PATCH 206/320] [SPARK-7671] Fix wrong URLs in MLlib Data Types
 Documentation

There is a mistake in the URL of Matrices in the MLlib Data Types documentation (Local matrix scala section), the URL points to https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.mllib.linalg.Matrices which is a mistake, since Matrices is an object that implements factory methods for Matrix that does not have a companion class. The correct link should point to https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$

There is another mistake, in the Local Vector section in Scala, Java and Python

In the Scala section the URL of Vectors points to the trait Vector (https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and not to the factory methods implemented in Vectors.

The correct link should be: https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$

In the Java section the URL of Vectors points to the Interface Vector (https://spark.apache.org/docs/latest/api/java/org/apache/spark/mllib/linalg/Vector.html) and not to the Class Vectors

The correct link should be:
https://spark.apache.org/docs/latest/api/java/org/apache/spark/mllib/linalg/Vectors.html

In the Python section the URL of Vectors points to the class Vector (https://spark.apache.org/docs/latest/api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) and not the Class Vectors

The correct link should be:
https://spark.apache.org/docs/latest/api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors

Author: FavioVazquez <favio.vazquezp@gmail.com>

Closes #6196 from FavioVazquez/fix-typo-matrices-mllib-datatypes and squashes the following commits:

3e9efd5 [FavioVazquez] - Fixed wrong URLs in the MLlib Data Types Documentation
9af7074 [FavioVazquez] Merge remote-tracking branch 'upstream/master'
edab1ef [FavioVazquez] Merge remote-tracking branch 'upstream/master'
b2e2f8c [FavioVazquez] Merge remote-tracking branch 'upstream/master'
---
 docs/mllib-data-types.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index 4f2a2f71048f7..acec0426dc69b 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -31,7 +31,7 @@ The base class of local vectors is
 implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and
 [`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector).  We recommend
 using the factory methods implemented in
-[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) to create local vectors.
+[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) to create local vectors.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -57,7 +57,7 @@ The base class of local vectors is
 implementations: [`DenseVector`](api/java/org/apache/spark/mllib/linalg/DenseVector.html) and
 [`SparseVector`](api/java/org/apache/spark/mllib/linalg/SparseVector.html).  We recommend
 using the factory methods implemented in
-[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vector.html) to create local vectors.
+[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vectors.html) to create local vectors.
 
 {% highlight java %}
 import org.apache.spark.mllib.linalg.Vector;
@@ -84,7 +84,7 @@ and the following as sparse vectors:
   with a single column
 
 We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented
-in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to create sparse vectors.
+in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors) to create sparse vectors.
 
 {% highlight python %}
 import numpy as np
@@ -241,7 +241,7 @@ The base class of local matrices is
 [`Matrix`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix), and we provide one
 implementation: [`DenseMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseMatrix).
 We recommend using the factory methods implemented
-in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices) to create local
+in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) to create local
 matrices.
 
 {% highlight scala %}

From 1fd33815f47478f5f2e8b55b90757819b8cb5247 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sat, 16 May 2015 08:18:41 +0100
Subject: [PATCH 207/320] [SPARK-4556] [BUILD] binary distribution assembly
 can't run in local mode

Add note on building a runnable distribution with make-distribution.sh

Author: Sean Owen <sowen@cloudera.com>

Closes #6186 from srowen/SPARK-4556 and squashes the following commits:

4002966 [Sean Owen] Add pointer to --help flag
9fa7883 [Sean Owen] Add note on building a runnable distribution with make-distribution.sh
---
 docs/building-spark.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index 6e310ff424784..4dbccb9e6e46c 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -34,6 +34,16 @@ and in `project/SparkBuild.scala` add:
 
 to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/pull/2883/files) if you are unsure of where to add these lines.
 
+# Building a Runnable Distribution
+
+To create a Spark distribution like those distributed by the 
+[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as 
+to be runnable, use `make-distribution.sh` in the project root directory. It can be configured 
+with Maven profile settings and so on like the direct Maven build. Example:
+
+    ./make-distribution.sh --name custom-spark --tgz -Phadoop-2.4 -Pyarn
+    
+For more information on usage, run `./make-distribution.sh --help`
 
 # Setting up Maven's Memory Usage
 

From 0ac8b01a07840f199bbc79fb845762284aead6de Mon Sep 17 00:00:00 2001
From: Nishkam Ravi <nravi@cloudera.com>
Date: Sat, 16 May 2015 08:24:21 +0100
Subject: [PATCH 208/320] [SPARK-7672] [CORE] Use int conversion in translating
 kryoserializer.buffer.mb to kryoserializer.buffer

In translating spark.kryoserializer.buffer.mb to spark.kryoserializer.buffer, use of toDouble will lead to "Fractional values not supported" error even when spark.kryoserializer.buffer.mb is an integer.
ilganeli, andrewor14

Author: Nishkam Ravi <nravi@cloudera.com>
Author: nishkamravi2 <nishkamravi@gmail.com>
Author: nravi <nravi@c1704.halxg.cloudera.com>

Closes #6198 from nishkamravi2/master_nravi and squashes the following commits:

171a53c [nishkamravi2] Update SparkConfSuite.scala
5261bf6 [Nishkam Ravi] Add a test for deprecated config spark.kryoserializer.buffer.mb
5190f79 [Nishkam Ravi] In translating from deprecated spark.kryoserializer.buffer.mb to spark.kryoserializer.buffer use int conversion since fractions are not permissible
059ce82 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
eaa13b5 [nishkamravi2] Update Client.scala
981afd2 [Nishkam Ravi] Check for read permission before initiating copy
1b81383 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
0f1abd0 [nishkamravi2] Update Utils.scala
474e3bf [nishkamravi2] Update DiskBlockManager.scala
97c383e [nishkamravi2] Update Utils.scala
8691e0c [Nishkam Ravi] Add a try/catch block around Utils.removeShutdownHook
2be1e76 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
1c13b79 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
bad4349 [nishkamravi2] Update Main.java
36a6f87 [Nishkam Ravi] Minor changes and bug fixes
b7f4ae7 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
4a45d6a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
458af39 [Nishkam Ravi] Locate the jar using getLocation, obviates the need to pass assembly path as an argument
d9658d6 [Nishkam Ravi] Changes for SPARK-6406
ccdc334 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
3faa7a4 [Nishkam Ravi] Launcher library changes (SPARK-6406)
345206a [Nishkam Ravi] spark-class merge Merge branch 'master_nravi' of https://github.com/nishkamravi2/spark into master_nravi
ac58975 [Nishkam Ravi] spark-class changes
06bfeb0 [nishkamravi2] Update spark-class
35af990 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
32c3ab3 [nishkamravi2] Update AbstractCommandBuilder.java
4bd4489 [nishkamravi2] Update AbstractCommandBuilder.java
746f35b [Nishkam Ravi] "hadoop" string in the assembly name should not be mandatory (everywhere else in spark we mandate spark-assembly*hadoop*.jar)
bfe96e0 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
ee902fa [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
d453197 [nishkamravi2] Update NewHadoopRDD.scala
6f41a1d [nishkamravi2] Update NewHadoopRDD.scala
0ce2c32 [nishkamravi2] Update HadoopRDD.scala
f7e33c2 [Nishkam Ravi] Merge branch 'master_nravi' of https://github.com/nishkamravi2/spark into master_nravi
ba1eb8b [Nishkam Ravi] Try-catch block around the two occurrences of removeShutDownHook. Deletion of semi-redundant occurrences of expensive operation inShutDown.
71d0e17 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
494d8c0 [nishkamravi2] Update DiskBlockManager.scala
3c5ddba [nishkamravi2] Update DiskBlockManager.scala
f0d12de [Nishkam Ravi] Workaround for IllegalStateException caused by recent changes to BlockManager.stop
79ea8b4 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
b446edc [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
5c9a4cb [nishkamravi2] Update TaskSetManagerSuite.scala
535295a [nishkamravi2] Update TaskSetManager.scala
3e1b616 [Nishkam Ravi] Modify test for maxResultSize
9f6583e [Nishkam Ravi] Changes to maxResultSize code (improve error message and add condition to check if maxResultSize > 0)
5f8f9ed [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
636a9ff [nishkamravi2] Update YarnAllocator.scala
8f76c8b [Nishkam Ravi] Doc change for yarn memory overhead
35daa64 [Nishkam Ravi] Slight change in the doc for yarn memory overhead
5ac2ec1 [Nishkam Ravi] Remove out
dac1047 [Nishkam Ravi] Additional documentation for yarn memory overhead issue
42c2c3d [Nishkam Ravi] Additional changes for yarn memory overhead issue
362da5e [Nishkam Ravi] Additional changes for yarn memory overhead
c726bd9 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
f00fa31 [Nishkam Ravi] Improving logging for AM memoryOverhead
1cf2d1e [nishkamravi2] Update YarnAllocator.scala
ebcde10 [Nishkam Ravi] Modify default YARN memory_overhead-- from an additive constant to a multiplier (redone to resolve merge conflicts)
2e69f11 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi
efd688a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark
2b630f9 [nravi] Accept memory input as "30g", "512M" instead of an int value, to be consistent with rest of Spark
3bf8fad [nravi] Merge branch 'master' of https://github.com/apache/spark
5423a03 [nravi] Merge branch 'master' of https://github.com/apache/spark
eb663ca [nravi] Merge branch 'master' of https://github.com/apache/spark
df2aeb1 [nravi] Improved fix for ConcurrentModificationIssue (Spark-1097, Hadoop-10456)
6b840f0 [nravi] Undo the fix for SPARK-1758 (the problem is fixed)
5108700 [nravi] Fix in Spark for the Concurrent thread modification issue (SPARK-1097, HADOOP-10456)
681b36f [nravi] Fix for SPARK-1758: failing test org.apache.spark.JavaAPISuite.wholeTextFiles
---
 core/src/main/scala/org/apache/spark/SparkConf.scala      | 2 +-
 core/src/test/scala/org/apache/spark/SparkConfSuite.scala | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index a8fc90ad2050e..b5e5d6f1465f3 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -509,7 +509,7 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.reducer.maxMbInFlight", "1.4")),
     "spark.kryoserializer.buffer" ->
         Seq(AlternateConfig("spark.kryoserializer.buffer.mb", "1.4", 
-          translation = s => s"${s.toDouble * 1000}k")),
+          translation = s => s"${(s.toDouble * 1000).toInt}k")),
     "spark.kryoserializer.buffer.max" -> Seq(
       AlternateConfig("spark.kryoserializer.buffer.max.mb", "1.4")),
     "spark.shuffle.file.buffer" -> Seq(
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 68d08e32f9aa4..fafa4ed606b08 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -241,6 +241,9 @@ class SparkConfSuite extends FunSuite with LocalSparkContext with ResetSystemPro
 
     conf.set("spark.yarn.applicationMaster.waitTries", "42")
     assert(conf.getTimeAsSeconds("spark.yarn.am.waitTime") === 420)
+
+    conf.set("spark.kryoserializer.buffer.mb", "1.1")
+    assert(conf.getSizeAsKb("spark.kryoserializer.buffer") === 1100)
   }
 
   test("akka deprecated configs") {

From 47e7ffe36b8a8a246fe9af522aff480d19c0c8a6 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Sat, 16 May 2015 00:44:29 -0700
Subject: [PATCH 209/320] [SPARK-7655][Core][SQL] Remove
 'scala.concurrent.ExecutionContext.Implicits.global' in 'ask' and
 'BroadcastHashJoin'

Because both `AkkaRpcEndpointRef.ask` and `BroadcastHashJoin` uses `scala.concurrent.ExecutionContext.Implicits.global`. However, because the tasks in `BroadcastHashJoin` are usually long-running tasks, which will occupy all threads in `global`. Then `ask` cannot get a chance to process the replies.

For `ask`, actually the tasks are very simple, so we can use `MoreExecutors.sameThreadExecutor()`. For `BroadcastHashJoin`, it's better to use `ThreadUtils.newDaemonCachedThreadPool`.

Author: zsxwing <zsxwing@gmail.com>

Closes #6200 from zsxwing/SPARK-7655-2 and squashes the following commits:

cfdc605 [zsxwing] Remove redundant imort and minor doc fix
cf83153 [zsxwing] Add "sameThread" and "newDaemonCachedThreadPool with maxThreadNumber" to ThreadUtils
08ad0ee [zsxwing] Remove 'scala.concurrent.ExecutionContext.Implicits.global' in 'ask' and 'BroadcastHashJoin'
---
 .../apache/spark/rpc/akka/AkkaRpcEnv.scala    |  8 ++++---
 .../org/apache/spark/util/ThreadUtils.scala   | 24 ++++++++++++++++++-
 .../apache/spark/util/ThreadUtilsSuite.scala  | 12 ++++++++++
 .../execution/joins/BroadcastHashJoin.scala   | 10 ++++++--
 4 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala
index ba0d468f111ef..0161962cde073 100644
--- a/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala
@@ -29,9 +29,11 @@ import akka.actor.{ActorSystem, ExtendedActorSystem, Actor, ActorRef, Props, Add
 import akka.event.Logging.Error
 import akka.pattern.{ask => akkaAsk}
 import akka.remote.{AssociationEvent, AssociatedEvent, DisassociatedEvent, AssociationErrorEvent}
+import com.google.common.util.concurrent.MoreExecutors
+
 import org.apache.spark.{SparkException, Logging, SparkConf}
 import org.apache.spark.rpc._
-import org.apache.spark.util.{ActorLogReceive, AkkaUtils}
+import org.apache.spark.util.{ActorLogReceive, AkkaUtils, ThreadUtils}
 
 /**
  * A RpcEnv implementation based on Akka.
@@ -294,8 +296,8 @@ private[akka] class AkkaRpcEndpointRef(
   }
 
   override def ask[T: ClassTag](message: Any, timeout: FiniteDuration): Future[T] = {
-    import scala.concurrent.ExecutionContext.Implicits.global
     actorRef.ask(AkkaMessage(message, true))(timeout).flatMap {
+      // The function will run in the calling thread, so it should be short and never block.
       case msg @ AkkaMessage(message, reply) =>
         if (reply) {
           logError(s"Receive $msg but the sender cannot reply")
@@ -305,7 +307,7 @@ private[akka] class AkkaRpcEndpointRef(
         }
       case AkkaFailure(e) =>
         Future.failed(e)
-    }.mapTo[T]
+    }(ThreadUtils.sameThread).mapTo[T]
   }
 
   override def toString: String = s"${getClass.getSimpleName}($actorRef)"
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 098a4b79496b2..ca5624a3d8b3d 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -20,10 +20,22 @@ package org.apache.spark.util
 
 import java.util.concurrent._
 
-import com.google.common.util.concurrent.ThreadFactoryBuilder
+import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}
+
+import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
 
 private[spark] object ThreadUtils {
 
+  private val sameThreadExecutionContext =
+    ExecutionContext.fromExecutorService(MoreExecutors.sameThreadExecutor())
+
+  /**
+   * An `ExecutionContextExecutor` that runs each task in the thread that invokes `execute/submit`.
+   * The caller should make sure the tasks running in this `ExecutionContextExecutor` are short and
+   * never block.
+   */
+  def sameThread: ExecutionContextExecutor = sameThreadExecutionContext
+
   /**
    * Create a thread factory that names threads with a prefix and also sets the threads to daemon.
    */
@@ -40,6 +52,16 @@ private[spark] object ThreadUtils {
     Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
   }
 
+  /**
+   * Create a cached thread pool whose max number of threads is `maxThreadNumber`. Thread names
+   * are formatted as prefix-ID, where ID is a unique, sequentially assigned integer.
+   */
+  def newDaemonCachedThreadPool(prefix: String, maxThreadNumber: Int): ThreadPoolExecutor = {
+    val threadFactory = namedThreadFactory(prefix)
+    new ThreadPoolExecutor(
+      0, maxThreadNumber, 60L, TimeUnit.SECONDS, new SynchronousQueue[Runnable], threadFactory)
+  }
+
   /**
    * Wrapper over newFixedThreadPool. Thread names are formatted as prefix-ID, where ID is a
    * unique, sequentially assigned integer.
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index a3aa3e953fbec..751d3df9cc8f7 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -20,6 +20,9 @@ package org.apache.spark.util
 
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 
+import scala.concurrent.{Await, Future}
+import scala.concurrent.duration._
+
 import org.scalatest.FunSuite
 
 class ThreadUtilsSuite extends FunSuite {
@@ -54,4 +57,13 @@ class ThreadUtilsSuite extends FunSuite {
       executor.shutdownNow()
     }
   }
+
+  test("sameThread") {
+    val callerThreadName = Thread.currentThread().getName()
+    val f = Future {
+      Thread.currentThread().getName()
+    }(ThreadUtils.sameThread)
+    val futureThreadName = Await.result(f, 10.seconds)
+    assert(futureThreadName === callerThreadName)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
index 05dd5681edfac..fe43fc4125c8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.ThreadUtils
 
 import scala.concurrent._
 import scala.concurrent.duration._
-import scala.concurrent.ExecutionContext.Implicits.global
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.expressions.{Row, Expression}
@@ -64,7 +64,7 @@ case class BroadcastHashJoin(
     val input: Array[Row] = buildPlan.execute().map(_.copy()).collect()
     val hashed = HashedRelation(input.iterator, buildSideKeyGenerator, input.length)
     sparkContext.broadcast(hashed)
-  }
+  }(BroadcastHashJoin.broadcastHashJoinExecutionContext)
 
   protected override def doExecute(): RDD[Row] = {
     val broadcastRelation = Await.result(broadcastFuture, timeout)
@@ -74,3 +74,9 @@ case class BroadcastHashJoin(
     }
   }
 }
+
+object BroadcastHashJoin {
+
+  private val broadcastHashJoinExecutionContext = ExecutionContext.fromExecutorService(
+    ThreadUtils.newDaemonCachedThreadPool("broadcast-hash-join", 1024))
+}

From ce6391296a061bc352386080a2ee96bb63fcc4ac Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Sat, 16 May 2015 20:55:10 +0800
Subject: [PATCH 210/320] [HOTFIX] [SQL] Fixes DataFrameWriter.mode(String)

We forgot an assignment there.

/cc rxin

Author: Cheng Lian <lian@databricks.com>

Closes #6212 from liancheng/fix-df-writer and squashes the following commits:

711fbb0 [Cheng Lian] Adds a test case
3b72d78 [Cheng Lian] Fixes DataFrameWriter.mode(String)
---
 .../main/scala/org/apache/spark/sql/DataFrameWriter.scala  | 2 +-
 .../scala/org/apache/spark/sql/sources/SaveLoadSuite.scala | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index b1fc18ac3cb54..9f42f0f1f4398 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -55,7 +55,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * @since 1.4.0
    */
   def mode(saveMode: String): DataFrameWriter = {
-    saveMode.toLowerCase match {
+    this.mode = saveMode.toLowerCase match {
       case "overwrite" => SaveMode.Overwrite
       case "append" => SaveMode.Append
       case "ignore" => SaveMode.Ignore
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index 7a28e9af3673c..274c652dd14d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -75,6 +75,13 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
     checkLoad()
   }
 
+  test("save with string mode and path, and load") {
+    conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json")
+    path.createNewFile()
+    df.write.mode("overwrite").save(path.toString)
+    checkLoad()
+  }
+
   test("save with path and datasource, and load") {
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name")
     df.write.json(path.toString)

From 1b4e710e5cdb00febb4c5920d81e77c2e3966a8b Mon Sep 17 00:00:00 2001
From: Matthew Brandyberry <mbrandy@us.ibm.com>
Date: Sat, 16 May 2015 18:17:48 +0100
Subject: [PATCH 211/320] [BUILD] update jblas dependency version to 1.2.4

jblas 1.2.4 includes native library support for PPC64LE.

Author: Matthew Brandyberry <mbrandy@us.ibm.com>

Closes #6199 from mtbrandy/jblas-1.2.4 and squashes the following commits:

9df9301 [Matthew Brandyberry] [BUILD] update jblas dependency version to 1.2.4
---
 LICENSE | 2 +-
 pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index d6b9ccf07d999..9d1b00beff748 100644
--- a/LICENSE
+++ b/LICENSE
@@ -861,7 +861,7 @@ The following components are provided under a BSD-style license. See project lin
 
      (BSD 3 Clause) core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
-     (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.3 - http://jblas.org/)
+     (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
      (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
diff --git a/pom.xml b/pom.xml
index 86aa0a9fa134c..1b45cdb67012a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -137,7 +137,7 @@
     <hive.version.short>0.13.1</hive.version.short>
     <derby.version>10.10.1.1</derby.version>
     <parquet.version>1.6.0rc3</parquet.version>
-    <jblas.version>1.2.3</jblas.version>
+    <jblas.version>1.2.4</jblas.version>
     <jetty.version>8.1.14.v20131031</jetty.version>
     <orbit.version>3.0.0.v201112011016</orbit.version>
     <chill.version>0.5.0</chill.version>

From 161d0b4a41f453b21adde46a86e16c2743752799 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 16 May 2015 15:03:57 -0700
Subject: [PATCH 212/320] [SPARK-7654][MLlib] Migrate MLlib to the DataFrame
 reader/writer API.

Author: Reynold Xin <rxin@databricks.com>

Closes #6211 from rxin/mllib-reader and squashes the following commits:

79a2cb9 [Reynold Xin] [SPARK-7654][MLlib] Migrate MLlib to the DataFrame reader/writer API.
---
 .../org/apache/spark/examples/mllib/DatasetExample.scala      | 2 +-
 .../scala/org/apache/spark/examples/sql/RDDRelation.scala     | 2 +-
 .../org/apache/spark/mllib/classification/NaiveBayes.scala    | 4 ++--
 .../mllib/classification/impl/GLMClassificationModel.scala    | 2 +-
 .../apache/spark/mllib/clustering/GaussianMixtureModel.scala  | 2 +-
 .../scala/org/apache/spark/mllib/clustering/KMeansModel.scala | 2 +-
 .../spark/mllib/clustering/PowerIterationClustering.scala     | 4 ++--
 .../main/scala/org/apache/spark/mllib/feature/Word2Vec.scala  | 2 +-
 .../spark/mllib/recommendation/MatrixFactorizationModel.scala | 4 ++--
 .../apache/spark/mllib/regression/IsotonicRegression.scala    | 2 +-
 .../spark/mllib/regression/impl/GLMRegressionModel.scala      | 2 +-
 .../org/apache/spark/mllib/tree/model/DecisionTreeModel.scala | 2 +-
 .../apache/spark/mllib/tree/model/treeEnsembleModels.scala    | 2 +-
 13 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
index c95cca7d656e8..520893b26d595 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
@@ -103,7 +103,7 @@ object DatasetExample {
     tmpDir.deleteOnExit()
     val outputDir = new File(tmpDir, "dataset").toString
     println(s"Saving to $outputDir as Parquet file.")
-    df.saveAsParquetFile(outputDir)
+    df.write.parquet(outputDir)
 
     println(s"Loading Parquet file with UDT from $outputDir.")
     val newDataset = sqlContext.read.parquet(outputDir)
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index acc89199d5849..b11e32047dc34 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -58,7 +58,7 @@ object RDDRelation {
     df.where($"key" === 1).orderBy($"value".asc).select($"key").collect().foreach(println)
 
     // Write out an RDD as a parquet file.
-    df.saveAsParquetFile("pair.parquet")
+    df.write.parquet("pair.parquet")
 
     // Read in parquet file.  Parquet files are self-describing so the schmema is preserved.
     val parquetFile = sqlContext.read.parquet("pair.parquet")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index af24ab616663b..ac0ebeceaa1df 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -140,7 +140,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
 
       // Create Parquet data.
       val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
-      dataRDD.saveAsParquetFile(dataPath(path))
+      dataRDD.write.parquet(dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): NaiveBayesModel = {
@@ -186,7 +186,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
 
       // Create Parquet data.
       val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
-      dataRDD.saveAsParquetFile(dataPath(path))
+      dataRDD.write.parquet(dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): NaiveBayesModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
index 3b6790cce47c6..d842ec57b2f52 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
@@ -62,7 +62,7 @@ private[classification] object GLMClassificationModel {
 
       // Create Parquet data.
       val data = Data(weights, intercept, threshold)
-      sc.parallelize(Seq(data), 1).toDF().saveAsParquetFile(Loader.dataPath(path))
+      sc.parallelize(Seq(data), 1).toDF().write.parquet(Loader.dataPath(path))
     }
 
     /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index c22862c130e77..731b43a1be574 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -126,7 +126,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
       val dataArray = Array.tabulate(weights.length) { i =>
         Data(weights(i), gaussians(i).mu, gaussians(i).sigma)
       }
-      sc.parallelize(dataArray, 1).toDF().saveAsParquetFile(Loader.dataPath(path))
+      sc.parallelize(dataArray, 1).toDF().write.parquet(Loader.dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): GaussianMixtureModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index ba228b11fcec3..252e166e85cef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -110,7 +110,7 @@ object KMeansModel extends Loader[KMeansModel] {
       val dataRDD = sc.parallelize(model.clusterCenters.zipWithIndex).map { case (point, id) =>
         Cluster(id, point)
       }.toDF()
-      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+      dataRDD.write.parquet(Loader.dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): KMeansModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index aa53e88d59856..1ed01c9d8ba0b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -74,7 +74,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
       sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path))
 
       val dataRDD = model.assignments.toDF()
-      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+      dataRDD.write.parquet(Loader.dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
@@ -86,7 +86,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
       assert(formatVersion == thisFormatVersion)
 
       val k = (metadata \ "k").extract[Int]
-      val assignments = sqlContext.parquetFile(Loader.dataPath(path))
+      val assignments = sqlContext.read.parquet(Loader.dataPath(path))
       Loader.checkSchema[PowerIterationClustering.Assignment](assignments.schema)
 
       val assignmentsRDD = assignments.map {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 98e83112f52ae..731f7576c2335 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -580,7 +580,7 @@ object Word2VecModel extends Loader[Word2VecModel] {
       sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path))
 
       val dataArray = model.toSeq.map { case (w, v) => Data(w, v) }
-      sc.parallelize(dataArray.toSeq, 1).toDF().saveAsParquetFile(Loader.dataPath(path))
+      sc.parallelize(dataArray.toSeq, 1).toDF().write.parquet(Loader.dataPath(path))
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 88c2148403313..b960fbc5bf5f5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -281,8 +281,8 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
       val metadata = compact(render(
         ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~ ("rank" -> model.rank)))
       sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path))
-      model.userFeatures.toDF("id", "features").saveAsParquetFile(userPath(path))
-      model.productFeatures.toDF("id", "features").saveAsParquetFile(productPath(path))
+      model.userFeatures.toDF("id", "features").write.parquet(userPath(path))
+      model.productFeatures.toDF("id", "features").write.parquet(productPath(path))
     }
 
     def load(sc: SparkContext, path: String): MatrixFactorizationModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 4ce541ae5bed9..22b9b22a871f0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -184,7 +184,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
 
       sqlContext.createDataFrame(
         boundaries.toSeq.zip(predictions).map { case (b, p) => Data(b, p) }
-      ).saveAsParquetFile(dataPath(path))
+      ).write.parquet(dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): (Array[Double], Array[Double]) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
index b55944f74f623..2aa0e9ef96d48 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -60,7 +60,7 @@ private[regression] object GLMRegressionModel {
       val data = Data(weights, intercept)
       val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
       // TODO: repartition with 1 partition after SPARK-5532 gets fixed
-      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+      dataRDD.write.parquet(Loader.dataPath(path))
     }
 
     /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 331af428533de..a558f84c8d506 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -223,7 +223,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
       val dataRDD: DataFrame = sc.parallelize(nodes)
         .map(NodeData.apply(0, _))
         .toDF()
-      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+      dataRDD.write.parquet(Loader.dataPath(path))
     }
 
     def load(sc: SparkContext, path: String, algo: String, numNodes: Int): DecisionTreeModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 8341219bfa71c..f9cd0140fe63f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -414,7 +414,7 @@ private[tree] object TreeEnsembleModel extends Logging {
       val dataRDD = sc.parallelize(model.trees.zipWithIndex).flatMap { case (tree, treeId) =>
         tree.topNode.subtreeIterator.toSeq.map(node => NodeData(treeId, node))
       }.toDF()
-      dataRDD.saveAsParquetFile(Loader.dataPath(path))
+      dataRDD.write.parquet(Loader.dataPath(path))
     }
 
     /**

From 3b6ef2c5391b528ef989e24400fbb0c496c3b245 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Sat, 16 May 2015 21:03:22 -0700
Subject: [PATCH 213/320] [SPARK-7655][Core] Deserializing value should not
 hold the TaskSchedulerImpl lock

We should not call `DirectTaskResult.value` when holding the `TaskSchedulerImpl` lock. It may cost dozens of seconds to deserialize a large object.

Author: zsxwing <zsxwing@gmail.com>

Closes #6195 from zsxwing/SPARK-7655 and squashes the following commits:

21f502e [zsxwing] Add more comments
e25fa88 [zsxwing] Add comments
15010b5 [zsxwing] Deserialize value should not hold the TaskSchedulerImpl lock
---
 .../apache/spark/scheduler/TaskResult.scala   | 23 +++++++++++++++++--
 .../spark/scheduler/TaskResultGetter.scala    |  4 ++++
 .../spark/scheduler/TaskSetManager.scala      |  6 +++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index 1f114a0207f7b..8b2a742b96988 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -40,6 +40,9 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long
     var metrics: TaskMetrics)
   extends TaskResult[T] with Externalizable {
 
+  private var valueObjectDeserialized = false
+  private var valueObject: T = _
+
   def this() = this(null.asInstanceOf[ByteBuffer], null, null)
 
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
@@ -72,10 +75,26 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long
       }
     }
     metrics = in.readObject().asInstanceOf[TaskMetrics]
+    valueObjectDeserialized = false
   }
 
+  /**
+   * When `value()` is called at the first time, it needs to deserialize `valueObject` from
+   * `valueBytes`. It may cost dozens of seconds for a large instance. So when calling `value` at
+   * the first time, the caller should avoid to block other threads.
+   *
+   * After the first time, `value()` is trivial and just returns the deserialized `valueObject`.
+   */
   def value(): T = {
-    val resultSer = SparkEnv.get.serializer.newInstance()
-    resultSer.deserialize(valueBytes)
+    if (valueObjectDeserialized) {
+      valueObject
+    } else {
+      // This should not run when holding a lock because it may cost dozens of seconds for a large
+      // value.
+      val resultSer = SparkEnv.get.serializer.newInstance()
+      valueObject = resultSer.deserialize(valueBytes)
+      valueObjectDeserialized = true
+      valueObject
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 391827c1d2156..46a6f6537e2ee 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -54,6 +54,10 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               if (!taskSetManager.canFetchMoreResults(serializedData.limit())) {
                 return
               }
+              // deserialize "value" without holding any lock so that it won't block other threads.
+              // We should call it here, so that when it's called again in
+              // "TaskSetManager.handleSuccessfulTask", it does not need to deserialize the value.
+              directResult.value()
               (directResult, serializedData.limit())
             case IndirectTaskResult(blockId, size) =>
               if (!taskSetManager.canFetchMoreResults(size)) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 7dc325283d961..c4487d5b37247 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -620,6 +620,12 @@ private[spark] class TaskSetManager(
     val index = info.index
     info.markSuccessful()
     removeRunningTask(tid)
+    // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the
+    // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not
+    // "deserialize" the value when holding a lock to avoid blocking other threads. So we call
+    // "result.value()" in "TaskResultGetter.enqueueSuccessfulTask" before reaching here.
+    // Note: "result.value()" only deserializes the value when it's called at the first time, so
+    // here "result.value()" just returns the value and won't block other threads.
     sched.dagScheduler.taskEnded(
       tasks(index), Success, result.value(), result.accumUpdates, info, result.metrics)
     if (!successful(index)) {

From 517eb37a85e0a28820bcfd5d98c50d02df6521c6 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 16 May 2015 22:01:53 -0700
Subject: [PATCH 214/320] [SPARK-7654][SQL] Move JDBC into DataFrame's
 reader/writer interface.

Also moved all the deprecated functions into one place for SQLContext and DataFrame, and updated tests to use the new API.

Author: Reynold Xin <rxin@databricks.com>

Closes #6210 from rxin/df-writer-reader-jdbc and squashes the following commits:

7465c2c [Reynold Xin] Fixed unit test.
118e609 [Reynold Xin] Updated tests.
3441b57 [Reynold Xin] Updated javadoc.
13cdd1c [Reynold Xin] [SPARK-7654][SQL] Move JDBC into DataFrame's reader/writer interface.
---
 .../spark/examples/sql/JavaSparkSQL.java      |   4 +-
 .../org/apache/spark/sql/DataFrame.scala      | 284 +++-----
 .../apache/spark/sql/DataFrameReader.scala    |  89 ++-
 .../apache/spark/sql/DataFrameWriter.scala    |  53 +-
 .../org/apache/spark/sql/SQLContext.scala     | 682 +++++++-----------
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   |  30 +-
 .../apache/spark/sql/jdbc/JDBCRelation.scala  |  16 +-
 .../org/apache/spark/sql/jdbc/JdbcUtils.scala |  52 ++
 .../org/apache/spark/sql/jdbc/jdbc.scala      |   6 +-
 .../spark/sql/JavaApplySchemaSuite.java       |   4 +-
 .../spark/sql/sources/JavaSaveLoadSuite.java  |  10 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala |  31 +-
 .../spark/sql/jdbc/JDBCWriteSuite.scala       |  54 +-
 .../hive/JavaMetastoreDataSourcesSuite.java   |  20 +-
 .../spark/sql/hive/CachedTableSuite.scala     |   4 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  73 +-
 .../hive/execution/HiveResolutionSuite.scala  |   6 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   8 +-
 .../apache/spark/sql/hive/parquetSuites.scala |  14 +-
 .../sql/sources/hadoopFsRelationSuites.scala  |  68 +-
 20 files changed, 747 insertions(+), 761 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala

diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
index 173633ce059e3..afee279ec32b1 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -94,7 +94,7 @@ public String call(Row row) {
 
     System.out.println("=== Data source: Parquet File ===");
     // DataFrames can be saved as parquet files, maintaining the schema information.
-    schemaPeople.saveAsParquetFile("people.parquet");
+    schemaPeople.write().parquet("people.parquet");
 
     // Read in the parquet file created above.
     // Parquet files are self-describing so the schema is preserved.
@@ -151,7 +151,7 @@ public String call(Row row) {
     List<String> jsonData = Arrays.asList(
           "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
     JavaRDD<String> anotherPeopleRDD = ctx.parallelize(jsonData);
-    DataFrame peopleFromJsonRDD = sqlContext.jsonRDD(anotherPeopleRDD.rdd());
+    DataFrame peopleFromJsonRDD = sqlContext.read().json(anotherPeopleRDD.rdd());
 
     // Take a look at the schema of this new DataFrame.
     peopleFromJsonRDD.printSchema();
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 55ef357a99f71..27e9af49f0664 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql
 
 import java.io.CharArrayWriter
-import java.sql.DriverManager
 import java.util.Properties
 
 import scala.collection.JavaConversions._
@@ -40,9 +39,8 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, _}
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, ScalaReflection, SqlParser}
 import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD}
-import org.apache.spark.sql.jdbc.JDBCWriteDetails
 import org.apache.spark.sql.json.JacksonGenerator
-import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, ResolvedDataSource}
+import org.apache.spark.sql.sources.CreateTableUsingAsSelect
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
@@ -227,10 +225,6 @@ class DataFrame private[sql](
     }
   }
 
-  /** Left here for backward compatibility. */
-  @deprecated("1.3.0", "use toDF")
-  def toSchemaRDD: DataFrame = this
-
   /**
    * Returns the object itself.
    * @group basic
@@ -1299,12 +1293,119 @@ class DataFrame private[sql](
   @Experimental
   def write: DataFrameWriter = new DataFrameWriter(this)
 
+  /**
+   * :: Experimental ::
+   * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
+   * @group output
+   * @since 1.3.0
+   */
+  @Experimental
+  def insertInto(tableName: String, overwrite: Boolean): Unit = {
+    sqlContext.executePlan(InsertIntoTable(UnresolvedRelation(Seq(tableName)),
+      Map.empty, logicalPlan, overwrite, ifNotExists = false)).toRdd
+  }
+
+  /**
+   * :: Experimental ::
+   * Adds the rows from this RDD to the specified table.
+   * Throws an exception if the table already exists.
+   * @group output
+   * @since 1.3.0
+   */
+  @Experimental
+  def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
+
+  /**
+   * Returns the content of the [[DataFrame]] as a RDD of JSON strings.
+   * @group rdd
+   * @since 1.3.0
+   */
+  def toJSON: RDD[String] = {
+    val rowSchema = this.schema
+    this.mapPartitions { iter =>
+      val writer = new CharArrayWriter()
+      // create the Generator without separator inserted between 2 records
+      val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+
+      new Iterator[String] {
+        override def hasNext: Boolean = iter.hasNext
+        override def next(): String = {
+          JacksonGenerator(rowSchema, gen)(iter.next())
+          gen.flush()
+
+          val json = writer.toString
+          if (hasNext) {
+            writer.reset()
+          } else {
+            gen.close()
+          }
+
+          json
+        }
+      }
+    }
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  // for Python API
+  ////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Converts a JavaRDD to a PythonRDD.
+   */
+  protected[sql] def javaToPython: JavaRDD[Array[Byte]] = {
+    val fieldTypes = schema.fields.map(_.dataType)
+    val jrdd = rdd.map(EvaluatePython.rowToArray(_, fieldTypes)).toJavaRDD()
+    SerDeUtil.javaToPython(jrdd)
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+  // Deprecated methods
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+
+  /** Left here for backward compatibility. */
+  @deprecated("use toDF", "1.3.0")
+  def toSchemaRDD: DataFrame = this
+
+  /**
+   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`.
+   * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
+   * If you pass `true` for `allowExisting`, it will drop any table with the
+   * given name; if you pass `false`, it will throw if the table already
+   * exists.
+   * @group output
+   */
+  @deprecated("Use write.jdbc()", "1.4.0")
+  def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
+    val w = if (allowExisting) write.mode(SaveMode.Overwrite) else write
+    w.jdbc(url, table, new Properties)
+  }
+
+  /**
+   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`.
+   * Assumes the table already exists and has a compatible schema.  If you
+   * pass `true` for `overwrite`, it will `TRUNCATE` the table before
+   * performing the `INSERT`s.
+   *
+   * The table must already exist on the database.  It must have a schema
+   * that is compatible with the schema of this RDD; inserting the rows of
+   * the RDD in order via the simple statement
+   * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
+   * @group output
+   */
+  @deprecated("Use write.jdbc()", "1.4.0")
+  def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
+    val w = if (overwrite) write.mode(SaveMode.Overwrite) else write
+    w.jdbc(url, table, new Properties)
+  }
+
   /**
    * Saves the contents of this [[DataFrame]] as a parquet file, preserving the schema.
    * Files that are written out using this method can be read back in as a [[DataFrame]]
    * using the `parquetFile` function in [[SQLContext]].
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.parquet(path)", "1.4.0")
   def saveAsParquetFile(path: String): Unit = {
@@ -1328,7 +1429,6 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String): Unit = {
@@ -1347,7 +1447,6 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.mode(mode).saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String, mode: SaveMode): Unit = {
@@ -1373,7 +1472,6 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String, source: String): Unit = {
@@ -1393,7 +1491,6 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).mode(mode).saveAsTable(tableName)", "1.4.0")
   def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
@@ -1412,7 +1509,6 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)",
     "1.4.0")
@@ -1437,7 +1533,6 @@ class DataFrame private[sql](
    * Also note that while this function can persist the table metadata into Hive's metastore,
    * the table will NOT be accessible from Hive, until SPARK-7550 is resolved.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)",
     "1.4.0")
@@ -1454,7 +1549,6 @@ class DataFrame private[sql](
    * using the default data source configured by spark.sql.sources.default and
    * [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.save(path)", "1.4.0")
   def save(path: String): Unit = {
@@ -1465,7 +1559,6 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
    * using the default data source configured by spark.sql.sources.default.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.mode(mode).save(path)", "1.4.0")
   def save(path: String, mode: SaveMode): Unit = {
@@ -1476,7 +1569,6 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source,
    * using [[SaveMode.ErrorIfExists]] as the save mode.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).save(path)", "1.4.0")
   def save(path: String, source: String): Unit = {
@@ -1487,7 +1579,6 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame to the given path based on the given data source and
    * [[SaveMode]] specified by mode.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).mode(mode).save(path)", "1.4.0")
   def save(path: String, source: String, mode: SaveMode): Unit = {
@@ -1498,7 +1589,6 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options.
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0")
   def save(
@@ -1513,7 +1603,6 @@ class DataFrame private[sql](
    * Saves the contents of this DataFrame based on the given data source,
    * [[SaveMode]] specified by mode, and a set of options
    * @group output
-   * @since 1.3.0
    */
   @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0")
   def save(
@@ -1523,163 +1612,10 @@ class DataFrame private[sql](
     write.format(source).mode(mode).options(options).save()
   }
 
-  /**
-   * :: Experimental ::
-   * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
-   * @group output
-   * @since 1.3.0
-   */
-  @Experimental
-  def insertInto(tableName: String, overwrite: Boolean): Unit = {
-    sqlContext.executePlan(InsertIntoTable(UnresolvedRelation(Seq(tableName)),
-      Map.empty, logicalPlan, overwrite, ifNotExists = false)).toRdd
-  }
-
-  /**
-   * :: Experimental ::
-   * Adds the rows from this RDD to the specified table.
-   * Throws an exception if the table already exists.
-   * @group output
-   * @since 1.3.0
-   */
-  @Experimental
-  def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false)
-
-  /**
-   * Returns the content of the [[DataFrame]] as a RDD of JSON strings.
-   * @group rdd
-   * @since 1.3.0
-   */
-  def toJSON: RDD[String] = {
-    val rowSchema = this.schema
-    this.mapPartitions { iter =>
-      val writer = new CharArrayWriter()
-      // create the Generator without separator inserted between 2 records
-      val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
-
-      new Iterator[String] {
-        override def hasNext: Boolean = iter.hasNext
-        override def next(): String = {
-          JacksonGenerator(rowSchema, gen)(iter.next())
-          gen.flush()
-
-          val json = writer.toString
-          if (hasNext) {
-            writer.reset()
-          } else {
-            gen.close()
-          }
-
-          json
-        }
-      }
-    }
-  }
-
   ////////////////////////////////////////////////////////////////////////////
-  // JDBC Write Support
   ////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`.
-   * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
-   * If you pass `true` for `allowExisting`, it will drop any table with the
-   * given name; if you pass `false`, it will throw if the table already
-   * exists.
-   * @group output
-   * @since 1.3.0
-   */
-  def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
-    createJDBCTable(url, table, allowExisting, new Properties())
-  }
-    
-  /**
-   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`
-   * using connection properties defined in `properties`.
-   * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
-   * If you pass `true` for `allowExisting`, it will drop any table with the
-   * given name; if you pass `false`, it will throw if the table already
-   * exists.
-   * @group output
-   * @since 1.4.0
-   */
-  def createJDBCTable(
-      url: String,
-      table: String,
-      allowExisting: Boolean,
-      properties: Properties): Unit = {
-    val conn = DriverManager.getConnection(url, properties)
-    try {
-      if (allowExisting) {
-        val sql = s"DROP TABLE IF EXISTS $table"
-        conn.prepareStatement(sql).executeUpdate()
-      }
-      val schema = JDBCWriteDetails.schemaString(this, url)
-      val sql = s"CREATE TABLE $table ($schema)"
-      conn.prepareStatement(sql).executeUpdate()
-    } finally {
-      conn.close()
-    }
-    JDBCWriteDetails.saveTable(this, url, table, properties)
-  }
-
-  /**
-   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`.
-   * Assumes the table already exists and has a compatible schema.  If you
-   * pass `true` for `overwrite`, it will `TRUNCATE` the table before
-   * performing the `INSERT`s.
-   *
-   * The table must already exist on the database.  It must have a schema
-   * that is compatible with the schema of this RDD; inserting the rows of
-   * the RDD in order via the simple statement
-   * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
-   * @group output
-   * @since 1.3.0
-   */
-  def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
-    insertIntoJDBC(url, table, overwrite, new Properties())
-  }
-
-  /**
-   * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`
-   * using connection properties defined in `properties`.
-   * Assumes the table already exists and has a compatible schema.  If you
-   * pass `true` for `overwrite`, it will `TRUNCATE` the table before
-   * performing the `INSERT`s.
-   *
-   * The table must already exist on the database.  It must have a schema
-   * that is compatible with the schema of this RDD; inserting the rows of
-   * the RDD in order via the simple statement
-   * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
-   * @group output
-   * @since 1.4.0
-   */
-  def insertIntoJDBC(
-      url: String,
-      table: String,
-      overwrite: Boolean,
-      properties: Properties): Unit = {
-    if (overwrite) {
-      val conn = DriverManager.getConnection(url, properties)
-      try {
-        val sql = s"TRUNCATE TABLE $table"
-        conn.prepareStatement(sql).executeUpdate()
-      } finally {
-        conn.close()
-      }
-    }
-    JDBCWriteDetails.saveTable(this, url, table, properties)
-  }
+  // End of eeprecated methods
   ////////////////////////////////////////////////////////////////////////////
-  // for Python API
   ////////////////////////////////////////////////////////////////////////////
 
-  /**
-   * Converts a JavaRDD to a PythonRDD.
-   */
-  protected[sql] def javaToPython: JavaRDD[Array[Byte]] = {
-    val fieldTypes = schema.fields.map(_.dataType)
-    val jrdd = rdd.map(EvaluatePython.rowToArray(_, fieldTypes)).toJavaRDD()
-    SerDeUtil.javaToPython(jrdd)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 4d63faad6fb7c..381c10f48f3c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql
 
+import java.util.Properties
+
 import org.apache.hadoop.fs.Path
+import org.apache.spark.Partition
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
 import org.apache.spark.sql.json.{JsonRDD, JSONRelation}
 import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources.{LogicalRelation, ResolvedDataSource}
@@ -31,7 +35,7 @@ import org.apache.spark.sql.types.StructType
 /**
  * :: Experimental ::
  * Interface used to load a [[DataFrame]] from external storage systems (e.g. file systems,
- * key-value stores, etc).
+ * key-value stores, etc). Use [[SQLContext.read]] to access this.
  *
  * @since 1.4.0
  */
@@ -94,6 +98,8 @@ class DataFrameReader private[sql](sqlContext: SQLContext) {
    * Specifies the input partitioning. If specified, the underlying data source does not need to
    * discover the data partitioning scheme, and thus can speed up very large inputs.
    *
+   * This is only applicable for Parquet at the moment.
+   *
    * @since 1.4.0
    */
   @scala.annotation.varargs
@@ -128,6 +134,87 @@ class DataFrameReader private[sql](sqlContext: SQLContext) {
     DataFrame(sqlContext, LogicalRelation(resolved.relation))
   }
 
+  /**
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table and connection properties.
+   *
+   * @since 1.4.0
+   */
+  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
+    jdbc(url, table, JDBCRelation.columnPartition(null), properties)
+  }
+
+  /**
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table. Partitions of the table will be retrieved in parallel based on the parameters
+   * passed to this function.
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * @param url JDBC database url of the form `jdbc:subprotocol:subname`
+   * @param table Name of the table in the external database.
+   * @param columnName the name of a column of integral type that will be used for partitioning.
+   * @param lowerBound the minimum value of `columnName` used to decide partition stride
+   * @param upperBound the maximum value of `columnName` used to decide partition stride
+   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
+   *                      evenly into this many partitions
+   * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
+   *                             tag/value. Normally at least a "user" and "password" property
+   *                             should be included.
+   *
+   * @since 1.4.0
+   */
+  def jdbc(
+      url: String,
+      table: String,
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int,
+      connectionProperties: Properties): DataFrame = {
+    val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions)
+    val parts = JDBCRelation.columnPartition(partitioning)
+    jdbc(url, table, parts, connectionProperties)
+  }
+
+  /**
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table using connection properties. The `predicates` parameter gives a list
+   * expressions suitable for inclusion in WHERE clauses; each one defines one partition
+   * of the [[DataFrame]].
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * @param url JDBC database url of the form `jdbc:subprotocol:subname`
+   * @param table Name of the table in the external database.
+   * @param predicates Condition in the where clause for each partition.
+   * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
+   *                             tag/value. Normally at least a "user" and "password" property
+   *                             should be included.
+   * @since 1.4.0
+   */
+  def jdbc(
+      url: String,
+      table: String,
+      predicates: Array[String],
+      connectionProperties: Properties): DataFrame = {
+    val parts: Array[Partition] = predicates.zipWithIndex.map { case (part, i) =>
+      JDBCPartition(part, i) : Partition
+    }
+    jdbc(url, table, parts, connectionProperties)
+  }
+
+  private def jdbc(
+      url: String,
+      table: String,
+      parts: Array[Partition],
+      connectionProperties: Properties): DataFrame = {
+    val relation = JDBCRelation(url, table, parts, connectionProperties)(sqlContext)
+    sqlContext.baseRelationToDataFrame(relation)
+  }
+
   /**
    * Loads a JSON file (one object per line) and returns the result as a [[DataFrame]].
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 9f42f0f1f4398..f2e721d4db271 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.sql
 
+import java.util.Properties
+
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.jdbc.{JDBCWriteDetails, JdbcUtils}
 import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect}
 
 
 /**
  * :: Experimental ::
  * Interface used to write a [[DataFrame]] to external storage systems (e.g. file systems,
- * key-value stores, etc).
+ * key-value stores, etc). Use [[DataFrame.write]] to access this.
  *
  * @since 1.4.0
  */
@@ -110,6 +113,8 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * Partitions the output by the given columns on the file system. If specified, the output is
    * laid out on the file system similar to Hive's partitioning scheme.
    *
+   * This is only applicable for Parquet at the moment.
+   *
    * @since 1.4.0
    */
   @scala.annotation.varargs
@@ -161,6 +166,52 @@ final class DataFrameWriter private[sql](df: DataFrame) {
     df.sqlContext.executePlan(cmd).toRdd
   }
 
+  /**
+   * Saves the content of the [[DataFrame]] to a external database table via JDBC. In the case the
+   * table already exists in the external database, behavior of this function depends on the
+   * save mode, specified by the `mode` function (default to throwing an exception).
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * @param url JDBC database url of the form `jdbc:subprotocol:subname`
+   * @param table Name of the table in the external database.
+   * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
+   *                             tag/value. Normally at least a "user" and "password" property
+   *                             should be included.
+   */
+  def jdbc(url: String, table: String, connectionProperties: Properties): Unit = {
+    val conn = JdbcUtils.createConnection(url, connectionProperties)
+
+    try {
+      var tableExists = JdbcUtils.tableExists(conn, table)
+
+      if (mode == SaveMode.Ignore && tableExists) {
+        return
+      }
+
+      if (mode == SaveMode.ErrorIfExists && tableExists) {
+        sys.error(s"Table $table already exists.")
+      }
+
+      if (mode == SaveMode.Overwrite && tableExists) {
+        JdbcUtils.dropTable(conn, table)
+        tableExists = false
+      }
+
+      // Create the table if the table didn't exist.
+      if (!tableExists) {
+        val schema = JDBCWriteDetails.schemaString(df, url)
+        val sql = s"CREATE TABLE $table ($schema)"
+        conn.prepareStatement(sql).executeUpdate()
+      }
+    } finally {
+      conn.close()
+    }
+
+    JDBCWriteDetails.saveTable(df, url, table, connectionProperties)
+  }
+
   /**
    * Saves the content of the [[DataFrame]] in JSON format at the specified path.
    * This is equivalent to:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 34a50e522c4ca..ac1a800219423 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -28,6 +28,7 @@ import scala.util.control.NonFatal
 
 import com.google.common.reflect.TypeToken
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDD
@@ -40,11 +41,9 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.ParserDialect
 import org.apache.spark.sql.execution.{Filter, _}
-import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
-import org.apache.spark.{Partition, SparkContext}
 
 /**
  * The entry point for working with structured data (rows and columns) in Spark.  Allows the
@@ -531,67 +530,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
     createDataFrame(rdd.rdd, beanClass)
   }
 
-  /**
-   * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s by applying a schema to this RDD.
-   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   * Example:
-   * {{{
-   *  import org.apache.spark.sql._
-   *  import org.apache.spark.sql.types._
-   *  val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-   *
-   *  val schema =
-   *    StructType(
-   *      StructField("name", StringType, false) ::
-   *      StructField("age", IntegerType, true) :: Nil)
-   *
-   *  val people =
-   *    sc.textFile("examples/src/main/resources/people.txt").map(
-   *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
-   *  val dataFrame = sqlContext. applySchema(people, schema)
-   *  dataFrame.printSchema
-   *  // root
-   *  // |-- name: string (nullable = false)
-   *  // |-- age: integer (nullable = true)
-   *
-   *  dataFrame.registerTempTable("people")
-   *  sqlContext.sql("select name from people").collect.foreach(println)
-   * }}}
-   */
-  @deprecated("use createDataFrame", "1.3.0")
-  def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema)
-  }
-
-  @deprecated("use createDataFrame", "1.3.0")
-  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema)
-  }
-
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   *          SELECT * queries will return the columns in an undefined order.
-   */
-  @deprecated("use createDataFrame", "1.3.0")
-  def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
-    createDataFrame(rdd, beanClass)
-  }
-
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   *          SELECT * queries will return the columns in an undefined order.
-   */
-  @deprecated("use createDataFrame", "1.3.0")
-  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
-    createDataFrame(rdd, beanClass)
-  }
-
   /**
    * :: Experimental ::
    * Returns a [[DataFrameReader]] that can be used to read data in as a [[DataFrame]].
@@ -606,205 +544,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
   @Experimental
   def read: DataFrameReader = new DataFrameReader(this)
 
-  /**
-   * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
-   * [[DataFrame]] if no paths are passed in.
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.parquet()", "1.4.0")
-  @scala.annotation.varargs
-  def parquetFile(paths: String*): DataFrame = {
-    if (paths.isEmpty) {
-      emptyDataFrame
-    } else if (conf.parquetUseDataSourceApi) {
-      read.parquet(paths : _*)
-    } else {
-      DataFrame(this, parquet.ParquetRelation(
-        paths.mkString(","), Some(sparkContext.hadoopConfiguration), this))
-    }
-  }
-
-  /**
-   * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonFile(path: String): DataFrame = {
-    read.json(path)
-  }
-
-  /**
-   * Loads a JSON file (one object per line) and applies the given schema,
-   * returning the result as a [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonFile(path: String, schema: StructType): DataFrame = {
-    read.schema(schema).json(path)
-  }
-
-  /**
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(path)
-  }
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * [[DataFrame]].
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * [[DataFrame]].
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
-   * returning the result as a [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
-    read.schema(schema).json(json)
-  }
-
-  /**
-   * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
-   * schema, returning the result as a [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
-    read.schema(schema).json(json)
-  }
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record) inferring the
-   * schema, returning the result as a [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(json)
-  }
-
-  /**
-   * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
-   * schema, returning the result as a [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.json()", "1.4.0")
-  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(json)
-  }
-
-  /**
-   * Returns the dataset stored at path as a DataFrame,
-   * using the default data source configured by spark.sql.sources.default.
-   *
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.load(path)", "1.4.0")
-  def load(path: String): DataFrame = {
-    read.load(path)
-  }
-
-  /**
-   * Returns the dataset stored at path as a DataFrame, using the given data source.
-   *
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.format(source).load(path)", "1.4.0")
-  def load(path: String, source: String): DataFrame = {
-    read.format(source).load(path)
-  }
-
-  /**
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame.
-   *
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
-  def load(source: String, options: java.util.Map[String, String]): DataFrame = {
-    read.options(options).format(source).load()
-  }
-
-  /**
-   * (Scala-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame.
-   *
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
-  def load(source: String, options: Map[String, String]): DataFrame = {
-    read.options(options).format(source).load()
-  }
-
-  /**
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
-   *
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
-  def load(
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    read.format(source).schema(schema).options(options).load()
-  }
-
-  /**
-   * (Scala-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
-   * @group genericdata
-   * @since 1.3.0
-   */
-  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
-  def load(
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    read.format(source).schema(schema).options(options).load()
-  }
-
   /**
    * :: Experimental ::
    * Creates an external table from the given path and returns the corresponding DataFrame.
@@ -903,150 +642,24 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *
    * @group ddl_ops
    * @since 1.3.0
-   */
-  @Experimental
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    val cmd =
-      CreateTableUsing(
-        tableName,
-        userSpecifiedSchema = Some(schema),
-        source,
-        temporary = false,
-        options,
-        allowExisting = false,
-        managedIfNoPath = false)
-    executePlan(cmd).toRdd
-    table(tableName)
-  }
-
-  /**
-   * :: Experimental ::
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table.
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @Experimental
-  def jdbc(url: String, table: String): DataFrame = {
-    jdbc(url, table, JDBCRelation.columnPartition(null), new Properties())
-  }
-
-  /**
-   * :: Experimental ::
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table and connection properties.
-   *
-   * @group specificdata
-   * @since 1.4.0
-   */
-  @Experimental
-  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
-    jdbc(url, table, JDBCRelation.columnPartition(null), properties)
-  }
-
-  /**
-   * :: Experimental ::
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
-   * passed to this function.
-   *
-   * @param columnName the name of a column of integral type that will be used for partitioning.
-   * @param lowerBound the minimum value of `columnName` used to decide partition stride
-   * @param upperBound the maximum value of `columnName` used to decide partition stride
-   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
-   *                      evenly into this many partitions
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @Experimental
-  def jdbc(
-      url: String,
-      table: String,
-      columnName: String,
-      lowerBound: Long,
-      upperBound: Long,
-      numPartitions: Int): DataFrame = {
-    jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties())
-  }
-
-  /**
-   * :: Experimental ::
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
-   * passed to this function.
-   *
-   * @param columnName the name of a column of integral type that will be used for partitioning.
-   * @param lowerBound the minimum value of `columnName` used to decide partition stride
-   * @param upperBound the maximum value of `columnName` used to decide partition stride
-   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
-   *                      evenly into this many partitions
-   * @param properties connection properties
-   * @group specificdata
-   * @since 1.4.0
-   */
-  @Experimental
-  def jdbc(
-      url: String,
-      table: String,
-      columnName: String,
-      lowerBound: Long,
-      upperBound: Long,
-      numPartitions: Int,
-      properties: Properties): DataFrame = {
-    val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions)
-    val parts = JDBCRelation.columnPartition(partitioning)
-    jdbc(url, table, parts, properties)
-  }
-
-  /**
-   * :: Experimental ::
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table. The theParts parameter gives a list expressions
-   * suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.3.0
-   */
-  @Experimental
-  def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
-    jdbc(url, table, theParts, new Properties())
-  }
-
-  /**
-   * :: Experimental ::
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
-   * url named table using connection properties. The theParts parameter gives a list expressions
-   * suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the [[DataFrame]].
-   *
-   * @group specificdata
-   * @since 1.4.0
-   */
-  @Experimental
-  def jdbc(
-      url: String,
-      table: String,
-      theParts: Array[String],
-      properties: Properties): DataFrame = {
-    val parts: Array[Partition] = theParts.zipWithIndex.map { case (part, i) =>
-      JDBCPartition(part, i) : Partition
-    }
-    jdbc(url, table, parts, properties)
-  }
-
-  private def jdbc(
-      url: String,
-      table: String,
-      parts: Array[Partition],
-      properties: Properties): DataFrame = {
-    val relation = JDBCRelation(url, table, parts, properties)(this)
-    baseRelationToDataFrame(relation)
+   */
+  @Experimental
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): DataFrame = {
+    val cmd =
+      CreateTableUsing(
+        tableName,
+        userSpecifiedSchema = Some(schema),
+        source,
+        temporary = false,
+        options,
+        allowExisting = false,
+        managedIfNoPath = false)
+    executePlan(cmd).toRdd
+    table(tableName)
   }
 
   /**
@@ -1372,6 +985,263 @@ class SQLContext(@transient val sparkContext: SparkContext)
     }
   }
 
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+  // Deprecated methods
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+
+  @deprecated("use createDataFrame", "1.3.0")
+  def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
+    createDataFrame(rowRDD, schema)
+  }
+
+  @deprecated("use createDataFrame", "1.3.0")
+  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
+    createDataFrame(rowRDD, schema)
+  }
+
+  @deprecated("use createDataFrame", "1.3.0")
+  def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
+    createDataFrame(rdd, beanClass)
+  }
+
+  @deprecated("use createDataFrame", "1.3.0")
+  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
+    createDataFrame(rdd, beanClass)
+  }
+
+  /**
+   * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
+   * [[DataFrame]] if no paths are passed in.
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.parquet()", "1.4.0")
+  @scala.annotation.varargs
+  def parquetFile(paths: String*): DataFrame = {
+    if (paths.isEmpty) {
+      emptyDataFrame
+    } else if (conf.parquetUseDataSourceApi) {
+      read.parquet(paths : _*)
+    } else {
+      DataFrame(this, parquet.ParquetRelation(
+        paths.mkString(","), Some(sparkContext.hadoopConfiguration), this))
+    }
+  }
+
+  /**
+   * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
+   * It goes through the entire dataset once to determine the schema.
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonFile(path: String): DataFrame = {
+    read.json(path)
+  }
+
+  /**
+   * Loads a JSON file (one object per line) and applies the given schema,
+   * returning the result as a [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonFile(path: String, schema: StructType): DataFrame = {
+    read.schema(schema).json(path)
+  }
+
+  /**
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
+    read.option("samplingRatio", samplingRatio.toString).json(path)
+  }
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
+   * [[DataFrame]].
+   * It goes through the entire dataset once to determine the schema.
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
+   * [[DataFrame]].
+   * It goes through the entire dataset once to determine the schema.
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
+   * returning the result as a [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
+    read.schema(schema).json(json)
+  }
+
+  /**
+   * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
+   * schema, returning the result as a [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
+    read.schema(schema).json(json)
+  }
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record) inferring the
+   * schema, returning the result as a [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
+    read.option("samplingRatio", samplingRatio.toString).json(json)
+  }
+
+  /**
+   * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
+   * schema, returning the result as a [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @deprecated("Use read.json()", "1.4.0")
+  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
+    read.option("samplingRatio", samplingRatio.toString).json(json)
+  }
+
+  /**
+   * Returns the dataset stored at path as a DataFrame,
+   * using the default data source configured by spark.sql.sources.default.
+   *
+   * @group genericdata
+   */
+  @deprecated("Use read.load(path)", "1.4.0")
+  def load(path: String): DataFrame = {
+    read.load(path)
+  }
+
+  /**
+   * Returns the dataset stored at path as a DataFrame, using the given data source.
+   *
+   * @group genericdata
+   */
+  @deprecated("Use read.format(source).load(path)", "1.4.0")
+  def load(path: String, source: String): DataFrame = {
+    read.format(source).load(path)
+  }
+
+  /**
+   * (Java-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame.
+   *
+   * @group genericdata
+   */
+  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
+  def load(source: String, options: java.util.Map[String, String]): DataFrame = {
+    read.options(options).format(source).load()
+  }
+
+  /**
+   * (Scala-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame.
+   *
+   * @group genericdata
+   */
+  @deprecated("Use read.format(source).options(options).load()", "1.4.0")
+  def load(source: String, options: Map[String, String]): DataFrame = {
+    read.options(options).format(source).load()
+  }
+
+  /**
+   * (Java-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+   *
+   * @group genericdata
+   */
+  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
+  def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame =
+  {
+    read.format(source).schema(schema).options(options).load()
+  }
+
+  /**
+   * (Scala-specific) Returns the dataset specified by the given data source and
+   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+   *
+   * @group genericdata
+   */
+  @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0")
+  def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
+    read.format(source).schema(schema).options(options).load()
+  }
+
+  /**
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table.
+   *
+   * @group specificdata
+   */
+  @deprecated("use read.jdbc()", "1.4.0")
+  def jdbc(url: String, table: String): DataFrame = {
+    read.jdbc(url, table, new Properties)
+  }
+
+  /**
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
+   * passed to this function.
+   *
+   * @param columnName the name of a column of integral type that will be used for partitioning.
+   * @param lowerBound the minimum value of `columnName` used to decide partition stride
+   * @param upperBound the maximum value of `columnName` used to decide partition stride
+   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
+   *                      evenly into this many partitions
+   * @group specificdata
+   */
+  @deprecated("use read.jdbc()", "1.4.0")
+  def jdbc(
+      url: String,
+      table: String,
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int): DataFrame = {
+    read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
+  }
+
+  /**
+   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * url named table. The theParts parameter gives a list expressions
+   * suitable for inclusion in WHERE clauses; each one defines one partition
+   * of the [[DataFrame]].
+   *
+   * @group specificdata
+   */
+  @deprecated("use read.jdbc()", "1.4.0")
+  def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
+    read.jdbc(url, table, theParts, new Properties)
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+  // End of eeprecated methods
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 40483d3ec7701..95935ba874a72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -29,7 +29,16 @@ import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.sources._
 
+/**
+ * Data corresponding to one partition of a JDBCRDD.
+ */
+private[sql] case class JDBCPartition(whereClause: String, idx: Int) extends Partition {
+  override def index: Int = idx
+}
+
+
 private[sql] object JDBCRDD extends Logging {
+
   /**
    * Maps a JDBC type to a Catalyst type.  This function is called only when
    * the DriverQuirks class corresponding to your database driver returns null.
@@ -168,6 +177,7 @@ private[sql] object JDBCRDD extends Logging {
       DriverManager.getConnection(url, properties)
     }
   }
+
   /**
    * Build and return JDBCRDD from the given information.
    *
@@ -193,18 +203,14 @@ private[sql] object JDBCRDD extends Logging {
       requiredColumns: Array[String],
       filters: Array[Filter],
       parts: Array[Partition]): RDD[Row] = {
-
-    val prunedSchema = pruneSchema(schema, requiredColumns)
-
-    return new
-        JDBCRDD(
-          sc,
-          getConnector(driver, url, properties),
-          prunedSchema,
-          fqTable,
-          requiredColumns,
-          filters,
-          parts)
+    new JDBCRDD(
+      sc,
+      getConnector(driver, url, properties),
+      pruneSchema(schema, requiredColumns),
+      fqTable,
+      requiredColumns,
+      filters,
+      parts)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
index 93e82549f213b..09d6865457df6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
@@ -17,26 +17,16 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.DriverManager
 import java.util.Properties
 
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Partition
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{SaveMode, DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.Utils
-
-/**
- * Data corresponding to one partition of a JDBCRDD.
- */
-private[sql] case class JDBCPartition(whereClause: String, idx: Int) extends Partition {
-  override def index: Int = idx
-}
 
 /**
  * Instructions on how to partition the table among workers.
@@ -152,6 +142,8 @@ private[sql] case class JDBCRelation(
   }
   
   override def insert(data: DataFrame, overwrite: Boolean): Unit = {
-    data.insertIntoJDBC(url, table, overwrite, properties)
+    data.write
+      .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
+      .jdbc(url, table, properties)
   }  
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala
new file mode 100644
index 0000000000000..cc918c237192b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.sql.{Connection, DriverManager}
+import java.util.Properties
+
+import scala.util.Try
+
+/**
+ * Util functions for JDBC tables.
+ */
+private[sql] object JdbcUtils {
+
+  /**
+   * Establishes a JDBC connection.
+   */
+  def createConnection(url: String, connectionProperties: Properties): Connection = {
+    DriverManager.getConnection(url, connectionProperties)
+  }
+
+  /**
+   * Returns true if the table already exists in the JDBC database.
+   */
+  def tableExists(conn: Connection, table: String): Boolean = {
+    // Somewhat hacky, but there isn't a good way to identify whether a table exists for all
+    // SQL database systems, considering "table" could also include the database name.
+    Try(conn.prepareStatement(s"SELECT 1 FROM $table LIMIT 1").executeQuery().next()).isSuccess
+  }
+
+  /**
+   * Drops a table from the JDBC database.
+   */
+  def dropTable(conn: Connection, table: String): Unit = {
+    conn.prepareStatement(s"DROP TABLE $table").executeUpdate()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
index c099881a01226..a61790b8472c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
@@ -163,8 +163,8 @@ package object jdbc {
         table: String,
         properties: Properties = new Properties()) {
       val quirks = DriverQuirks.get(url)
-      var nullTypes: Array[Int] = df.schema.fields.map(field => {
-        var nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2
+      val nullTypes: Array[Int] = df.schema.fields.map { field =>
+        val nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2
         if (nullType.isEmpty) {
           field.dataType match {
             case IntegerType => java.sql.Types.INTEGER
@@ -183,7 +183,7 @@ package object jdbc {
               s"Can't translate null value for field $field")
           }
         } else nullType.get
-      }).toArray
+      }
 
       val rddSchema = df.schema
       df.foreachPartition { iterator =>
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java
index c344a9b095c52..fcb8f5499cf84 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java
@@ -187,14 +187,14 @@ public void applySchemaToJSON() {
         null,
         "this is another simple string."));
 
-    DataFrame df1 = sqlContext.jsonRDD(jsonRDD);
+    DataFrame df1 = sqlContext.read().json(jsonRDD);
     StructType actualSchema1 = df1.schema();
     Assert.assertEquals(expectedSchema, actualSchema1);
     df1.registerTempTable("jsonTable1");
     List<Row> actual1 = sqlContext.sql("select * from jsonTable1").collectAsList();
     Assert.assertEquals(expectedResult, actual1);
 
-    DataFrame df2 = sqlContext.jsonRDD(jsonRDD, expectedSchema);
+    DataFrame df2 = sqlContext.read().schema(expectedSchema).json(jsonRDD);
     StructType actualSchema2 = df2.schema();
     Assert.assertEquals(expectedSchema, actualSchema2);
     df2.registerTempTable("jsonTable2");
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java
index 6a0bcefe7aa88..2706e01bd28af 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java
@@ -67,7 +67,7 @@ public void setUp() throws IOException {
       jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}");
     }
     JavaRDD<String> rdd = sc.parallelize(jsonObjects);
-    df = sqlContext.jsonRDD(rdd);
+    df = sqlContext.read().json(rdd);
     df.registerTempTable("jsonTable");
   }
 
@@ -75,10 +75,8 @@ public void setUp() throws IOException {
   public void saveAndLoad() {
     Map<String, String> options = new HashMap<String, String>();
     options.put("path", path.toString());
-    df.save("json", SaveMode.ErrorIfExists, options);
-
+    df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save();
     DataFrame loadedDF = sqlContext.read().format("json").options(options).load();
-
     checkAnswer(loadedDF, df.collectAsList());
   }
 
@@ -86,12 +84,12 @@ public void saveAndLoad() {
   public void saveAndLoadWithSchema() {
     Map<String, String> options = new HashMap<String, String>();
     options.put("path", path.toString());
-    df.save("json", SaveMode.ErrorIfExists, options);
+    df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save();
 
     List<StructField> fields = new ArrayList<StructField>();
     fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
     StructType schema = DataTypes.createStructType(fields);
-    DataFrame loadedDF = sqlContext.load("json", schema, options);
+    DataFrame loadedDF = sqlContext.read().format("json").schema(schema).options(options).load();
 
     checkAnswer(loadedDF, sqlContext.sql("SELECT b FROM jsonTable").collectAsList());
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 2abfe7f167f77..5a7b6f0aac6f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -221,22 +221,25 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("Basic API") {
-    assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE").collect().size === 3)
+    assert(TestSQLContext.read.jdbc(
+      urlWithUserAndPass, "TEST.PEOPLE", new Properties).collect().length === 3)
   }
 
   test("Partitioning via JDBCPartitioningInfo API") {
-    assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3)
-      .collect.size === 3)
+    assert(
+      TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3, new Properties)
+      .collect().length === 3)
   }
 
   test("Partitioning via list-of-where-clauses API") {
     val parts = Array[String]("THEID < 2", "THEID >= 2")
-    assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts).collect().size === 3)
+    assert(TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties)
+      .collect().length === 3)
   }
 
   test("H2 integral types") {
     val rows = sql("SELECT * FROM inttypes WHERE A IS NOT NULL").collect()
-    assert(rows.size === 1)
+    assert(rows.length === 1)
     assert(rows(0).getInt(0) === 1)
     assert(rows(0).getBoolean(1) === false)
     assert(rows(0).getInt(2) === 3)
@@ -246,7 +249,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
 
   test("H2 null entries") {
     val rows = sql("SELECT * FROM inttypes WHERE A IS NULL").collect()
-    assert(rows.size === 1)
+    assert(rows.length === 1)
     assert(rows(0).isNullAt(0))
     assert(rows(0).isNullAt(1))
     assert(rows(0).isNullAt(2))
@@ -286,24 +289,28 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("test DATE types") {
-    val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").collect()
-    val cachedRows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").cache().collect()
+    val rows = TestSQLContext.read.jdbc(
+      urlWithUserAndPass, "TEST.TIMETYPES", new Properties).collect()
+    val cachedRows = TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties)
+      .cache().collect()
     assert(rows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
     assert(rows(1).getAs[java.sql.Date](1) === null)
     assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
   }
 
   test("test DATE types in cache") {
-    val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").collect()
-    TestSQLContext
-      .jdbc(urlWithUserAndPass, "TEST.TIMETYPES").cache().registerTempTable("mycached_date")
+    val rows =
+      TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties).collect()
+    TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties)
+      .cache().registerTempTable("mycached_date")
     val cachedRows = sql("select * from mycached_date").collect()
     assert(rows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
     assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01"))
   }
 
   test("test types for null value") {
-    val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.NULLTYPES").collect()
+    val rows = TestSQLContext.read.jdbc(
+      urlWithUserAndPass, "TEST.NULLTYPES", new Properties).collect()
     assert((0 to 14).forall(i => rows(0).isNullAt(i)))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 0800eded443de..2e4c12f9da80c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -22,7 +22,7 @@ import java.util.Properties
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{SaveMode, Row}
 import org.apache.spark.sql.test._
 import org.apache.spark.sql.types._
 
@@ -90,64 +90,66 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter {
   test("Basic CREATE") {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2)
 
-    df.createJDBCTable(url, "TEST.BASICCREATETEST", false)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.BASICCREATETEST").count)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.BASICCREATETEST").collect()(0).length)
+    df.write.jdbc(url, "TEST.BASICCREATETEST", new Properties)
+    assert(2 == TestSQLContext.read.jdbc(url, "TEST.BASICCREATETEST", new Properties).count)
+    assert(2 ==
+      TestSQLContext.read.jdbc(url, "TEST.BASICCREATETEST", new Properties).collect()(0).length)
   }
 
   test("CREATE with overwrite") {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x3), schema3)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url1, "TEST.DROPTEST", false, properties)
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).count)
-    assert(3 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
+    df.write.jdbc(url1, "TEST.DROPTEST", properties)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).count)
+    assert(3 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
 
-    df2.createJDBCTable(url1, "TEST.DROPTEST", true, properties)
-    assert(1 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).count)
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
+    df2.write.mode(SaveMode.Overwrite).jdbc(url1, "TEST.DROPTEST", properties)
+    assert(1 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).count)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length)
   }
 
   test("CREATE then INSERT to append") {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url, "TEST.APPENDTEST", false)
-    df2.insertIntoJDBC(url, "TEST.APPENDTEST", false)
-    assert(3 == TestSQLContext.jdbc(url, "TEST.APPENDTEST").count)
-    assert(2 == TestSQLContext.jdbc(url, "TEST.APPENDTEST").collect()(0).length)
+    df.write.jdbc(url, "TEST.APPENDTEST", new Properties)
+    df2.write.mode(SaveMode.Append).jdbc(url, "TEST.APPENDTEST", new Properties)
+    assert(3 == TestSQLContext.read.jdbc(url, "TEST.APPENDTEST", new Properties).count)
+    assert(2 ==
+      TestSQLContext.read.jdbc(url, "TEST.APPENDTEST", new Properties).collect()(0).length)
   }
 
   test("CREATE then INSERT to truncate") {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2)
 
-    df.createJDBCTable(url1, "TEST.TRUNCATETEST", false, properties)
-    df2.insertIntoJDBC(url1, "TEST.TRUNCATETEST", true, properties)
-    assert(1 == TestSQLContext.jdbc(url1, "TEST.TRUNCATETEST", properties).count)
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
+    df.write.jdbc(url1, "TEST.TRUNCATETEST", properties)
+    df2.write.mode(SaveMode.Overwrite).jdbc(url1, "TEST.TRUNCATETEST", properties)
+    assert(1 == TestSQLContext.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
   }
 
   test("Incompatible INSERT to append") {
     val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2)
     val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr2x3), schema3)
 
-    df.createJDBCTable(url, "TEST.INCOMPATIBLETEST", false)
+    df.write.jdbc(url, "TEST.INCOMPATIBLETEST", new Properties)
     intercept[org.apache.spark.SparkException] {
-      df2.insertIntoJDBC(url, "TEST.INCOMPATIBLETEST", true)
+      df2.write.mode(SaveMode.Append).jdbc(url, "TEST.INCOMPATIBLETEST", new Properties)
     }
   }
-  
+
   test("INSERT to JDBC Datasource") {
     TestSQLContext.sql("INSERT INTO TABLE PEOPLE1 SELECT * FROM PEOPLE")
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).count)
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
   }
-  
+
   test("INSERT to JDBC Datasource with overwrite") {
     TestSQLContext.sql("INSERT INTO TABLE PEOPLE1 SELECT * FROM PEOPLE")
     TestSQLContext.sql("INSERT OVERWRITE TABLE PEOPLE1 SELECT * FROM PEOPLE")
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).count)
-    assert(2 == TestSQLContext.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count)
+    assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
   } 
 }
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
index 53ddecf57958b..58fe96adab17e 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
@@ -81,7 +81,7 @@ public void setUp() throws IOException {
       jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}");
     }
     JavaRDD<String> rdd = sc.parallelize(jsonObjects);
-    df = sqlContext.jsonRDD(rdd);
+    df = sqlContext.read().json(rdd);
     df.registerTempTable("jsonTable");
   }
 
@@ -96,7 +96,11 @@ public void tearDown() throws IOException {
   public void saveExternalTableAndQueryIt() {
     Map<String, String> options = new HashMap<String, String>();
     options.put("path", path.toString());
-    df.saveAsTable("javaSavedTable", "org.apache.spark.sql.json", SaveMode.Append, options);
+    df.write()
+      .format("org.apache.spark.sql.json")
+      .mode(SaveMode.Append)
+      .options(options)
+      .saveAsTable("javaSavedTable");
 
     checkAnswer(
       sqlContext.sql("SELECT * FROM javaSavedTable"),
@@ -115,7 +119,11 @@ public void saveExternalTableAndQueryIt() {
   public void saveExternalTableWithSchemaAndQueryIt() {
     Map<String, String> options = new HashMap<String, String>();
     options.put("path", path.toString());
-    df.saveAsTable("javaSavedTable", "org.apache.spark.sql.json", SaveMode.Append, options);
+    df.write()
+      .format("org.apache.spark.sql.json")
+      .mode(SaveMode.Append)
+      .options(options)
+      .saveAsTable("javaSavedTable");
 
     checkAnswer(
       sqlContext.sql("SELECT * FROM javaSavedTable"),
@@ -138,7 +146,11 @@ public void saveExternalTableWithSchemaAndQueryIt() {
   @Test
   public void saveTableAndQueryIt() {
     Map<String, String> options = new HashMap<String, String>();
-    df.saveAsTable("javaSavedTable", "org.apache.spark.sql.json", SaveMode.Append, options);
+    df.write()
+      .format("org.apache.spark.sql.json")
+      .mode(SaveMode.Append)
+      .options(options)
+      .saveAsTable("javaSavedTable");
 
     checkAnswer(
       sqlContext.sql("SELECT * FROM javaSavedTable"),
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index fc6c3c35037b0..945596db80326 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -162,7 +162,7 @@ class CachedTableSuite extends QueryTest {
   test("REFRESH TABLE also needs to recache the data (data source tables)") {
     val tempPath: File = Utils.createTempDir()
     tempPath.delete()
-    table("src").save(tempPath.toString, "parquet", SaveMode.Overwrite)
+    table("src").write.mode(SaveMode.Overwrite).parquet(tempPath.toString)
     sql("DROP TABLE IF EXISTS refreshTable")
     createExternalTable("refreshTable", tempPath.toString, "parquet")
     checkAnswer(
@@ -172,7 +172,7 @@ class CachedTableSuite extends QueryTest {
     sql("CACHE TABLE refreshTable")
     assertCached(table("refreshTable"))
     // Append new data.
-    table("src").save(tempPath.toString, "parquet", SaveMode.Append)
+    table("src").write.mode(SaveMode.Append).parquet(tempPath.toString)
     // We are still using the old data.
     assertCached(table("refreshTable"))
     checkAnswer(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 58b0b80c31e2e..30db976a3ae74 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -409,11 +409,11 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     val originalDefaultSource = conf.defaultDataSourceName
 
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-    val df = jsonRDD(rdd)
+    val df = read.json(rdd)
 
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json")
     // Save the df as a managed table (by not specifiying the path).
-    df.saveAsTable("savedJsonTable")
+    df.write.saveAsTable("savedJsonTable")
 
     checkAnswer(
       sql("SELECT * FROM savedJsonTable where savedJsonTable.a < 5"),
@@ -443,11 +443,11 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     val originalDefaultSource = conf.defaultDataSourceName
 
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-    val df = jsonRDD(rdd)
+    val df = read.json(rdd)
 
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json")
     // Save the df as a managed table (by not specifiying the path).
-    df.saveAsTable("savedJsonTable")
+    df.write.saveAsTable("savedJsonTable")
 
     checkAnswer(
       sql("SELECT * FROM savedJsonTable"),
@@ -455,17 +455,17 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
     // Right now, we cannot append to an existing JSON table.
     intercept[RuntimeException] {
-      df.saveAsTable("savedJsonTable", SaveMode.Append)
+      df.write.mode(SaveMode.Append).saveAsTable("savedJsonTable")
     }
 
     // We can overwrite it.
-    df.saveAsTable("savedJsonTable", SaveMode.Overwrite)
+    df.write.mode(SaveMode.Overwrite).saveAsTable("savedJsonTable")
     checkAnswer(
       sql("SELECT * FROM savedJsonTable"),
       df.collect())
 
     // When the save mode is Ignore, we will do nothing when the table already exists.
-    df.select("b").saveAsTable("savedJsonTable", SaveMode.Ignore)
+    df.select("b").write.mode(SaveMode.Ignore).saveAsTable("savedJsonTable")
     assert(df.schema === table("savedJsonTable").schema)
     checkAnswer(
       sql("SELECT * FROM savedJsonTable"),
@@ -479,11 +479,11 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
     // Create an external table by specifying the path.
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name")
-    df.saveAsTable(
-      "savedJsonTable",
-      "org.apache.spark.sql.json",
-      SaveMode.Append,
-      Map("path" -> tempPath.toString))
+    df.write
+      .format("org.apache.spark.sql.json")
+      .mode(SaveMode.Append)
+      .option("path", tempPath.toString)
+      .saveAsTable("savedJsonTable")
     checkAnswer(
       sql("SELECT * FROM savedJsonTable"),
       df.collect())
@@ -501,14 +501,13 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     val originalDefaultSource = conf.defaultDataSourceName
 
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-    val df = jsonRDD(rdd)
+    val df = read.json(rdd)
 
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name")
-    df.saveAsTable(
-      "savedJsonTable",
-      "org.apache.spark.sql.json",
-      SaveMode.Append,
-      Map("path" -> tempPath.toString))
+    df.write.format("org.apache.spark.sql.json")
+      .mode(SaveMode.Append)
+      .option("path", tempPath.toString)
+      .saveAsTable("savedJsonTable")
 
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json")
     createExternalTable("createdJsonTable", tempPath.toString)
@@ -566,7 +565,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
       setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
 
       val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
-      jsonRDD(rdd).registerTempTable("jt")
+      read.json(rdd).registerTempTable("jt")
       sql(
         """
           |create table test_parquet_ctas STORED AS parquET
@@ -601,7 +600,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
       StructType(
         StructField("a", ArrayType(IntegerType, containsNull = true), nullable = true) :: Nil)
     assert(df1.schema === expectedSchema1)
-    df1.saveAsTable("arrayInParquet", "parquet", SaveMode.Overwrite)
+    df1.write.mode(SaveMode.Overwrite).format("parquet").saveAsTable("arrayInParquet")
 
     val df2 =
       createDataFrame(Tuple1(Seq(2, 3)) :: Nil).toDF("a")
@@ -610,10 +609,10 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
         StructField("a", ArrayType(IntegerType, containsNull = false), nullable = true) :: Nil)
     assert(df2.schema === expectedSchema2)
     df2.insertInto("arrayInParquet", overwrite = false)
-    createDataFrame(Tuple1(Seq(4, 5)) :: Nil).toDF("a")
-      .saveAsTable("arrayInParquet", SaveMode.Append) // This one internally calls df2.insertInto.
-    createDataFrame(Tuple1(Seq(Int.box(6), null.asInstanceOf[Integer])) :: Nil).toDF("a")
-      .saveAsTable("arrayInParquet", "parquet", SaveMode.Append)
+    createDataFrame(Tuple1(Seq(4, 5)) :: Nil).toDF("a").write.mode(SaveMode.Append)
+      .saveAsTable("arrayInParquet") // This one internally calls df2.insertInto.
+    createDataFrame(Tuple1(Seq(Int.box(6), null.asInstanceOf[Integer])) :: Nil).toDF("a").write
+      .mode(SaveMode.Append).saveAsTable("arrayInParquet")
     refreshTable("arrayInParquet")
 
     checkAnswer(
@@ -634,7 +633,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
       StructType(
         StructField("a", mapType1, nullable = true) :: Nil)
     assert(df1.schema === expectedSchema1)
-    df1.saveAsTable("mapInParquet", "parquet", SaveMode.Overwrite)
+    df1.write.mode(SaveMode.Overwrite).format("parquet").saveAsTable("mapInParquet")
 
     val df2 =
       createDataFrame(Tuple1(Map(2 -> 3)) :: Nil).toDF("a")
@@ -644,10 +643,10 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
         StructField("a", mapType2, nullable = true) :: Nil)
     assert(df2.schema === expectedSchema2)
     df2.insertInto("mapInParquet", overwrite = false)
-    createDataFrame(Tuple1(Map(4 -> 5)) :: Nil).toDF("a")
-      .saveAsTable("mapInParquet", SaveMode.Append) // This one internally calls df2.insertInto.
-    createDataFrame(Tuple1(Map(6 -> null.asInstanceOf[Integer])) :: Nil).toDF("a")
-      .saveAsTable("mapInParquet", "parquet", SaveMode.Append)
+    createDataFrame(Tuple1(Map(4 -> 5)) :: Nil).toDF("a").write.mode(SaveMode.Append)
+      .saveAsTable("mapInParquet") // This one internally calls df2.insertInto.
+    createDataFrame(Tuple1(Map(6 -> null.asInstanceOf[Integer])) :: Nil).toDF("a").write
+      .format("parquet").mode(SaveMode.Append).saveAsTable("mapInParquet")
     refreshTable("mapInParquet")
 
     checkAnswer(
@@ -711,30 +710,30 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     def createDF(from: Int, to: Int): DataFrame =
       createDataFrame((from to to).map(i => Tuple2(i, s"str$i"))).toDF("c1", "c2")
 
-    createDF(0, 9).saveAsTable("insertParquet", "parquet")
+    createDF(0, 9).write.format("parquet").saveAsTable("insertParquet")
     checkAnswer(
       sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"),
       (6 to 9).map(i => Row(i, s"str$i")))
 
     intercept[AnalysisException] {
-      createDF(10, 19).saveAsTable("insertParquet", "parquet")
+      createDF(10, 19).write.format("parquet").saveAsTable("insertParquet")
     }
 
-    createDF(10, 19).saveAsTable("insertParquet", "parquet", SaveMode.Append)
+    createDF(10, 19).write.mode(SaveMode.Append).format("parquet").saveAsTable("insertParquet")
     checkAnswer(
       sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"),
       (6 to 19).map(i => Row(i, s"str$i")))
 
-    createDF(20, 29).saveAsTable("insertParquet", "parquet", SaveMode.Append)
+    createDF(20, 29).write.mode(SaveMode.Append).format("parquet").saveAsTable("insertParquet")
     checkAnswer(
       sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 25"),
       (6 to 24).map(i => Row(i, s"str$i")))
 
     intercept[AnalysisException] {
-      createDF(30, 39).saveAsTable("insertParquet")
+      createDF(30, 39).write.saveAsTable("insertParquet")
     }
 
-    createDF(30, 39).saveAsTable("insertParquet", SaveMode.Append)
+    createDF(30, 39).write.mode(SaveMode.Append).saveAsTable("insertParquet")
     checkAnswer(
       sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 35"),
       (6 to 34).map(i => Row(i, s"str$i")))
@@ -744,11 +743,11 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
       sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 45"),
       (6 to 44).map(i => Row(i, s"str$i")))
 
-    createDF(50, 59).saveAsTable("insertParquet", SaveMode.Overwrite)
+    createDF(50, 59).write.mode(SaveMode.Overwrite).saveAsTable("insertParquet")
     checkAnswer(
       sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 51 AND p.c1 < 55"),
       (52 to 54).map(i => Row(i, s"str$i")))
-    createDF(60, 69).saveAsTable("insertParquet", SaveMode.Ignore)
+    createDF(60, 69).write.mode(SaveMode.Ignore).saveAsTable("insertParquet")
     checkAnswer(
       sql("SELECT p.c1, c2 FROM insertParquet p"),
       (50 to 59).map(i => Row(i, s"str$i")))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 8ad3627504229..3dfa6e72e1242 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.hive.test.TestHive.{sparkContext, jsonRDD, sql}
+import org.apache.spark.sql.hive.test.TestHive.{read, sparkContext, jsonRDD, sql}
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 
 case class Nested(a: Int, B: Int)
@@ -31,14 +31,14 @@ case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
 class HiveResolutionSuite extends HiveComparisonTest {
 
   test("SPARK-3698: case insensitive test for nested data") {
-    jsonRDD(sparkContext.makeRDD(
+    read.json(sparkContext.makeRDD(
       """{"a": [{"a": {"a": 1}}]}""" :: Nil)).registerTempTable("nested")
     // This should be successfully analyzed
     sql("SELECT a[0].A.A from nested").queryExecution.analyzed
   }
 
   test("SPARK-5278: check ambiguous reference to fields") {
-    jsonRDD(sparkContext.makeRDD(
+    read.json(sparkContext.makeRDD(
       """{"a": [{"b": 1, "B": 2}]}""" :: Nil)).registerTempTable("nested")
 
     // there are 2 filed matching field name "b", we should report Ambiguous reference error
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index dfe73c62c42b9..ca2c4b4019c55 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -535,14 +535,14 @@ class SQLQuerySuite extends QueryTest {
 
   test("SPARK-4296 Grouping field with Hive UDF as sub expression") {
     val rdd = sparkContext.makeRDD( """{"a": "str", "b":"1", "c":"1970-01-01 00:00:00"}""" :: Nil)
-    jsonRDD(rdd).registerTempTable("data")
+    read.json(rdd).registerTempTable("data")
     checkAnswer(
       sql("SELECT concat(a, '-', b), year(c) FROM data GROUP BY concat(a, '-', b), year(c)"),
       Row("str-1", 1970))
 
     dropTempTable("data")
 
-    jsonRDD(rdd).registerTempTable("data")
+    read.json(rdd).registerTempTable("data")
     checkAnswer(sql("SELECT year(c) + 1 FROM data GROUP BY year(c) + 1"), Row(1971))
 
     dropTempTable("data")
@@ -550,7 +550,7 @@ class SQLQuerySuite extends QueryTest {
 
   test("resolve udtf with single alias") {
     val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}"""))
-    jsonRDD(rdd).registerTempTable("data")
+    read.json(rdd).registerTempTable("data")
     val df = sql("SELECT explode(a) AS val FROM data")
     val col = df("val")
   }
@@ -563,7 +563,7 @@ class SQLQuerySuite extends QueryTest {
     // PreInsertionCasts will actually start to work before ImplicitGenerate and then
     // generates an invalid query plan.
     val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}"""))
-    jsonRDD(rdd).registerTempTable("data")
+    read.json(rdd).registerTempTable("data")
     val originalConf = getConf("spark.sql.hive.convertCTAS", "false")
     setConf("spark.sql.hive.convertCTAS", "false")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index a0075f1e44ca8..05d99983b6a63 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -150,9 +150,9 @@ class ParquetMetastoreSuiteBase extends ParquetPartitioningTest {
     }
 
     val rdd1 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str$i"}"""))
-    jsonRDD(rdd1).registerTempTable("jt")
+    read.json(rdd1).registerTempTable("jt")
     val rdd2 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":[$i, null]}"""))
-    jsonRDD(rdd2).registerTempTable("jt_array")
+    read.json(rdd2).registerTempTable("jt_array")
 
     setConf("spark.sql.hive.convertMetastoreParquet", "true")
   }
@@ -617,16 +617,16 @@ class ParquetSourceSuiteBase extends ParquetPartitioningTest {
     sql("drop table if exists spark_6016_fix")
 
     // Create a DataFrame with two partitions. So, the created table will have two parquet files.
-    val df1 = jsonRDD(sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i}"""), 2))
-    df1.saveAsTable("spark_6016_fix", "parquet", SaveMode.Overwrite)
+    val df1 = read.json(sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i}"""), 2))
+    df1.write.mode(SaveMode.Overwrite).format("parquet").saveAsTable("spark_6016_fix")
     checkAnswer(
       sql("select * from spark_6016_fix"),
       (1 to 10).map(i => Row(i))
     )
 
     // Create a DataFrame with four partitions. So, the created table will have four parquet files.
-    val df2 = jsonRDD(sparkContext.parallelize((1 to 10).map(i => s"""{"b":$i}"""), 4))
-    df2.saveAsTable("spark_6016_fix", "parquet", SaveMode.Overwrite)
+    val df2 = read.json(sparkContext.parallelize((1 to 10).map(i => s"""{"b":$i}"""), 4))
+    df2.write.mode(SaveMode.Overwrite).format("parquet").saveAsTable("spark_6016_fix")
     // For the bug of SPARK-6016, we are caching two outdated footers for df1. Then,
     // since the new table has four parquet files, we are trying to read new footers from two files
     // and then merge metadata in footers of these four (two outdated ones and two latest one),
@@ -663,7 +663,7 @@ class ParquetDataSourceOnSourceSuite extends ParquetSourceSuiteBase {
         StructField("a", arrayType1, nullable = true) :: Nil)
     assert(df.schema === expectedSchema1)
 
-    df.saveAsTable("alwaysNullable", "parquet")
+    df.write.format("parquet").saveAsTable("alwaysNullable")
 
     val mapType2 = MapType(IntegerType, IntegerType, valueContainsNull = true)
     val arrayType2 = ArrayType(IntegerType, containsNull = true)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
index f44b3c521e647..9d9b436cabe3c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -120,10 +120,7 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   test("save()/load() - non-partitioned table - ErrorIfExists") {
     withTempDir { file =>
       intercept[RuntimeException] {
-        testDF.save(
-          path = file.getCanonicalPath,
-          source = dataSourceName,
-          mode = SaveMode.ErrorIfExists)
+        testDF.write.format(dataSourceName).mode(SaveMode.ErrorIfExists).save(file.getCanonicalPath)
       }
     }
   }
@@ -233,10 +230,8 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
   test("save()/load() - partitioned table - Ignore") {
     withTempDir { file =>
-      partitionedTestDF.save(
-        path = file.getCanonicalPath,
-        source = dataSourceName,
-        mode = SaveMode.Ignore)
+      partitionedTestDF.write
+        .format(dataSourceName).mode(SaveMode.Ignore).save(file.getCanonicalPath)
 
       val path = new Path(file.getCanonicalPath)
       val fs = path.getFileSystem(SparkHadoopUtil.get.conf)
@@ -249,11 +244,9 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   }
 
   test("saveAsTable()/load() - non-partitioned table - Overwrite") {
-    testDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      Map("dataSchema" -> dataSchema.json))
+    testDF.write.format(dataSourceName).mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .saveAsTable("t")
 
     withTable("t") {
       checkAnswer(table("t"), testDF.collect())
@@ -261,15 +254,8 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
   }
 
   test("saveAsTable()/load() - non-partitioned table - Append") {
-    testDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite)
-
-    testDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Append)
+    testDF.write.format(dataSourceName).mode(SaveMode.Overwrite).saveAsTable("t")
+    testDF.write.format(dataSourceName).mode(SaveMode.Append).saveAsTable("t")
 
     withTable("t") {
       checkAnswer(table("t"), testDF.unionAll(testDF).orderBy("a").collect())
@@ -281,10 +267,7 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
 
     withTempTable("t") {
       intercept[AnalysisException] {
-        testDF.saveAsTable(
-          tableName = "t",
-          source = dataSourceName,
-          mode = SaveMode.ErrorIfExists)
+        testDF.write.format(dataSourceName).mode(SaveMode.ErrorIfExists).saveAsTable("t")
       }
     }
   }
@@ -293,21 +276,16 @@ class HadoopFsRelationTest extends QueryTest with ParquetTest {
     Seq.empty[(Int, String)].toDF().registerTempTable("t")
 
     withTempTable("t") {
-      testDF.saveAsTable(
-        tableName = "t",
-        source = dataSourceName,
-        mode = SaveMode.Ignore)
-
+      testDF.write.format(dataSourceName).mode(SaveMode.Ignore).saveAsTable("t")
       assert(table("t").collect().isEmpty)
     }
   }
 
   test("saveAsTable()/load() - partitioned table - simple queries") {
-    partitionedTestDF.saveAsTable(
-      tableName = "t",
-      source = dataSourceName,
-      mode = SaveMode.Overwrite,
-      Map("dataSchema" -> dataSchema.json))
+    partitionedTestDF.write.format(dataSourceName)
+      .mode(SaveMode.Overwrite)
+      .option("dataSchema", dataSchema.json)
+      .saveAsTable("t")
 
     withTable("t") {
       checkQueries(table("t"))
@@ -492,11 +470,9 @@ class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest {
         StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
 
       checkQueries(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchemaWithPartition.json)))
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchemaWithPartition.json)
+          .load(file.getCanonicalPath))
     }
   }
 }
@@ -518,18 +494,16 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
         sparkContext
           .parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1))
           .toDF("a", "b", "p1")
-          .saveAsParquetFile(partitionDir.toString)
+          .write.parquet(partitionDir.toString)
       }
 
       val dataSchemaWithPartition =
         StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
 
       checkQueries(
-        load(
-          source = dataSourceName,
-          options = Map(
-            "path" -> file.getCanonicalPath,
-            "dataSchema" -> dataSchemaWithPartition.json)))
+        read.format(dataSourceName)
+          .option("dataSchema", dataSchemaWithPartition.json)
+          .load(file.getCanonicalPath))
     }
   }
 }

From ba4f8ca0d9ccc0a39a8a0105541d0cc1f4912d62 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Sat, 16 May 2015 23:20:09 -0700
Subject: [PATCH 215/320] [MINOR] [SQL] Removes an unreachable case clause

This case clause is already covered by the one above, and generates a compilation warning.

Author: Cheng Lian <lian@databricks.com>

Closes #6214 from liancheng/remove-unreachable-code and squashes the following commits:

c38ca7c [Cheng Lian] Removes an unreachable case clause
---
 sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
index 1eacdde7413f1..ab33125b74c17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
@@ -101,7 +101,6 @@ private[sql] case class PreWriteCheck(catalog: Catalog) extends (LogicalPlan =>
           }
         }
 
-      case logical.InsertIntoTable(LogicalRelation(_: InsertableRelation), _, _, _, _) => // OK
       case logical.InsertIntoTable(LogicalRelation(_: HadoopFsRelation), _, _, _, _) => // OK
       case logical.InsertIntoTable(l: LogicalRelation, _, _, _, _) =>
         // The relation in l is not an InsertableRelation.

From 1a7b9ce80bb5649796dda48d6a6d662a2809d0ef Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Sun, 17 May 2015 00:12:20 -0700
Subject: [PATCH 216/320] [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts

cc pwendell

P.S: I can't believe this was outdated all along ?

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #6215 from shivaram/update-ec2-map and squashes the following commits:

ae3937a [Shivaram Venkataraman] Add 1.3, 1.3.1 to master branch EC2 scripts
---
 ec2/spark_ec2.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index ab4a96f232c13..be92d5f45aa77 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -48,7 +48,7 @@
     from urllib.request import urlopen, Request
     from urllib.error import HTTPError
 
-SPARK_EC2_VERSION = "1.2.1"
+SPARK_EC2_VERSION = "1.3.1"
 SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))
 
 VALID_SPARK_VERSIONS = set([
@@ -65,6 +65,8 @@
     "1.1.1",
     "1.2.0",
     "1.2.1",
+    "1.3.0",
+    "1.3.1",
 ])
 
 SPARK_TACHYON_MAP = {
@@ -75,6 +77,8 @@
     "1.1.1": "0.5.0",
     "1.2.0": "0.5.0",
     "1.2.1": "0.5.0",
+    "1.3.0": "0.5.0",
+    "1.3.1": "0.5.0",
 }
 
 DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION

From edf09ea1bd4bf7692e0085ad9c70cb1bfc8d06d8 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Sun, 17 May 2015 15:17:11 +0800
Subject: [PATCH 217/320] [SQL] [MINOR] Skip unresolved expression for
 InConversion

Author: scwf <wangfei1@huawei.com>

Closes #6145 from scwf/InConversion and squashes the following commits:

5c8ac6b [scwf] minir fix for InConversion
---
 .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index fe0d3f29977c3..b45b17d856fac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -296,6 +296,9 @@ trait HiveTypeCoercion {
    */
   object InConversion extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+      // Skip nodes who's children have not been resolved yet.
+      case e if !e.childrenResolved => e 
+      
       case i @ In(a, b) if b.exists(_.dataType != a.dataType) =>
         i.makeCopy(Array(a, b.map(Cast(_, a.dataType))))
     }

From 339905578790fa37fcad9684b859b443313a5aa2 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 17 May 2015 15:42:21 +0800
Subject: [PATCH 218/320] [SPARK-7447] [SQL] Don't re-merge Parquet schema when
 the relation is deserialized

JIRA: https://issues.apache.org/jira/browse/SPARK-7447

`MetadataCache` in `ParquetRelation2` is annotated as `transient`. When `ParquetRelation2` is deserialized, we ask `MetadataCache` to refresh and perform schema merging again. It is time-consuming especially for very many parquet files.

With the new `FSBasedParquetRelation`, although `MetadataCache` is not `transient` now, `MetadataCache.refresh()` still performs schema merging again when the relation is deserialized.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6012 from viirya/without_remerge_schema and squashes the following commits:

2663957 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into without_remerge_schema
6ac7d93 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into without_remerge_schema
b0fc09b [Liang-Chi Hsieh] Don't generate and merge parquetSchema multiple times.
---
 .../apache/spark/sql/parquet/newParquet.scala | 32 +++++++++++--------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 946062f6ea64e..bcbdb1ebd236a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -340,7 +340,7 @@ private[sql] class ParquetRelation2(
 
     // Schema of the actual Parquet files, without partition columns discovered from partition
     // directory paths.
-    var dataSchema: StructType = _
+    var dataSchema: StructType = null
 
     // Schema of the whole table, including partition columns.
     var schema: StructType = _
@@ -379,19 +379,23 @@ private[sql] class ParquetRelation2(
         f -> new Footer(f.getPath, parquetMetadata)
       }.seq.toMap
 
-      dataSchema = {
-        val dataSchema0 =
-          maybeDataSchema
-            .orElse(readSchema())
-            .orElse(maybeMetastoreSchema)
-            .getOrElse(sys.error("Failed to get the schema."))
-
-        // If this Parquet relation is converted from a Hive Metastore table, must reconcile case
-        // case insensitivity issue and possible schema mismatch (probably caused by schema
-        // evolution).
-        maybeMetastoreSchema
-          .map(ParquetRelation2.mergeMetastoreParquetSchema(_, dataSchema0))
-          .getOrElse(dataSchema0)
+      // If we already get the schema, don't need to re-compute it since the schema merging is
+      // time-consuming.
+      if (dataSchema == null) {
+        dataSchema = {
+          val dataSchema0 =
+            maybeDataSchema
+              .orElse(readSchema())
+              .orElse(maybeMetastoreSchema)
+              .getOrElse(sys.error("Failed to get the schema."))
+        
+          // If this Parquet relation is converted from a Hive Metastore table, must reconcile case
+          // case insensitivity issue and possible schema mismatch (probably caused by schema
+          // evolution).
+          maybeMetastoreSchema
+            .map(ParquetRelation2.mergeMetastoreParquetSchema(_, dataSchema0))
+            .getOrElse(dataSchema0)
+        }
       }
     }
 

From 50217667cc1239ed3b15f4d10907b727ed85d7fa Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@hortonworks.com>
Date: Sun, 17 May 2015 17:03:11 +0100
Subject: [PATCH 219/320] =?UTF-8?q?[SPARK-7669]=20Builds=20against=20Hadoo?=
 =?UTF-8?q?p=202.6+=20get=20inconsistent=20curator=20depend=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a new profile, `hadoop-2.6`, copying over the hadoop-2.4 properties, updating ZK to 3.4.6 and making the curator version a configurable option. That keeps the curator-recipes JAR in sync with that used in hadoop.

There's one more option to consider: making the full curator-client version explicit with its own dependency version. This will pin down the version from hadoop and hive imports

Author: Steve Loughran <stevel@hortonworks.com>

Closes #6191 from steveloughran/stevel/SPARK-7669-hadoop-2.6 and squashes the following commits:

e3e281a [Steve Loughran] SPARK-7669 declare the version of curator-client and curator-framework JARs
2901ea9 [Steve Loughran] SPARK-7669 Builds against Hadoop 2.6+ get inconsistent curator dependencies
---
 pom.xml | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 1b45cdb67012a..6768a039d11e0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,6 +130,7 @@
     <hbase.artifact>hbase</hbase.artifact>
     <flume.version>1.4.0</flume.version>
     <zookeeper.version>3.4.5</zookeeper.version>
+    <curator.version>2.4.0</curator.version>
     <hive.group>org.spark-project.hive</hive.group>
     <!-- Version used in Maven Hive dependency -->
     <hive.version>0.13.1a</hive.version>
@@ -707,7 +708,7 @@
       <dependency>
         <groupId>org.apache.curator</groupId>
         <artifactId>curator-recipes</artifactId>
-        <version>2.4.0</version>
+        <version>${curator.version}</version>
         <scope>${hadoop.deps.scope}</scope>
         <exclusions>
           <exclusion>
@@ -716,6 +717,16 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.apache.curator</groupId>
+        <artifactId>curator-client</artifactId>
+        <version>${curator.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.curator</groupId>
+        <artifactId>curator-framework</artifactId>
+        <version>${curator.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client</artifactId>
@@ -1679,6 +1690,17 @@
       </properties>
     </profile>
 
+    <profile>
+      <id>hadoop-2.6</id>
+      <properties>
+        <hadoop.version>2.6.0</hadoop.version>
+        <jets3t.version>0.9.3</jets3t.version>
+        <commons.math3.version>3.1.1</commons.math3.version>
+        <zookeeper.version>3.4.6</zookeeper.version>
+        <curator.version>2.6.0</curator.version>
+      </properties>
+    </profile>
+
     <profile>
       <id>yarn</id>
       <modules>
@@ -1709,7 +1731,7 @@
         <dependency>
           <groupId>org.apache.curator</groupId>
           <artifactId>curator-recipes</artifactId>
-          <version>2.4.0</version>
+          <version>${curator.version}</version>
           <exclusions>
             <exclusion>
               <groupId>org.apache.zookeeper</groupId>

From f2cc6b5bccc3a70fd7d69183b1a068800831fe19 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Sun, 17 May 2015 09:30:49 -0700
Subject: [PATCH 220/320] [SPARK-7660] Wrap SnappyOutputStream to work around
 snappy-java bug

This patch wraps `SnappyOutputStream` to ensure that `close()` is idempotent and to guard against write-after-`close()` bugs. This is a workaround for https://github.com/xerial/snappy-java/issues/107, a bug where a non-idempotent `close()` method can lead to stream corruption. We can remove this workaround if we upgrade to a snappy-java version that contains my fix for this bug, but in the meantime this patch offers a backportable Spark fix.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6176 from JoshRosen/SPARK-7660-wrap-snappy and squashes the following commits:

8b77aae [Josh Rosen] Wrap SnappyOutputStream to fix SPARK-7660
---
 .../apache/spark/io/CompressionCodec.scala    | 49 ++++++++++++++++++-
 .../unsafe/UnsafeShuffleWriterSuite.java      |  8 ---
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 0756cdb2ed8e6..0d8ac1f80a9f4 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.io
 
-import java.io.{InputStream, OutputStream}
+import java.io.{IOException, InputStream, OutputStream}
 
 import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
 import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream}
@@ -154,8 +154,53 @@ class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
     val blockSize = conf.getSizeAsBytes("spark.io.compression.snappy.blockSize", "32k").toInt
-    new SnappyOutputStream(s, blockSize)
+    new SnappyOutputStreamWrapper(new SnappyOutputStream(s, blockSize))
   }
 
   override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s)
 }
+
+/**
+ * Wrapper over [[SnappyOutputStream]] which guards against write-after-close and double-close
+ * issues. See SPARK-7660 for more details. This wrapping can be removed if we upgrade to a version
+ * of snappy-java that contains the fix for https://github.com/xerial/snappy-java/issues/107.
+ */
+private final class SnappyOutputStreamWrapper(os: SnappyOutputStream) extends OutputStream {
+
+  private[this] var closed: Boolean = false
+
+  override def write(b: Int): Unit = {
+    if (closed) {
+      throw new IOException("Stream is closed")
+    }
+    os.write(b)
+  }
+
+  override def write(b: Array[Byte]): Unit = {
+    if (closed) {
+      throw new IOException("Stream is closed")
+    }
+    os.write(b)
+  }
+
+  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+    if (closed) {
+      throw new IOException("Stream is closed")
+    }
+    os.write(b, off, len)
+  }
+
+  override def flush(): Unit = {
+    if (closed) {
+      throw new IOException("Stream is closed")
+    }
+    os.flush()
+  }
+
+  override def close(): Unit = {
+    if (!closed) {
+      closed = true
+      os.close()
+    }
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
index 78e52643531e0..730d265c87f88 100644
--- a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
@@ -35,7 +35,6 @@
 import org.mockito.MockitoAnnotations;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
-import org.xerial.snappy.buffer.CachedBufferAllocator;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.lessThan;
@@ -97,13 +96,6 @@ public OutputStream apply(OutputStream stream) {
   @After
   public void tearDown() {
     Utils.deleteRecursively(tempDir);
-    // This call is a workaround for SPARK-7660, a snappy-java bug which is exposed by this test
-    // suite. Clearing the cached buffer allocator's pool of reusable buffers masks this bug,
-    // preventing a test failure in JavaAPISuite that would otherwise occur. The underlying bug
-    // needs to be fixed, but in the meantime this workaround avoids spurious Jenkins failures.
-    synchronized (CachedBufferAllocator.class) {
-      CachedBufferAllocator.queueTable.clear();
-    }
     final long leakedMemory = taskMemoryManager.cleanUpAllAllocatedMemory();
     if (leakedMemory != 0) {
       fail("Test leaked " + leakedMemory + " bytes of managed memory");

From 564562874f589c4c8bcabcd9d6eb9a6b0eada938 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Sun, 17 May 2015 11:59:28 -0700
Subject: [PATCH 221/320] [SPARK-7686] [SQL] DescribeCommand is assigned wrong
 output attributes in SparkStrategies

In `SparkStrategies`, `RunnableDescribeCommand` is called with the output attributes of the table being described rather than the attributes for the `describe` command's output.  I discovered this issue because it caused type conversion errors in some UnsafeRow conversion code that I'm writing.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6217 from JoshRosen/SPARK-7686 and squashes the following commits:

953a344 [Josh Rosen] Fix SPARK-7686 with a simple change in SparkStrategies.
a4eec9f [Josh Rosen] Add failing regression test for SPARK-7686
---
 .../org/apache/spark/sql/execution/SparkStrategies.scala    | 4 ++--
 .../scala/org/apache/spark/sql/sources/DDLTestSuite.scala   | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index af0029cb84f9a..3f6a0345bc17d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -354,10 +354,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case c: CreateTableUsingAsSelect if !c.temporary =>
         sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.")
 
-      case LogicalDescribeCommand(table, isExtended) =>
+      case describe @ LogicalDescribeCommand(table, isExtended) =>
         val resultPlan = self.sqlContext.executePlan(table).executedPlan
         ExecutedCommand(
-          RunnableDescribeCommand(resultPlan, resultPlan.output, isExtended)) :: Nil
+          RunnableDescribeCommand(resultPlan, describe.output, isExtended)) :: Nil
 
       case _ => Nil
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index 6664e8d64c13a..f5106f67a08df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -99,4 +99,10 @@ class DDLTestSuite extends DataSourceTest {
         Row("arrayType", "array<string>", ""),
         Row("structType", "struct<f1:string,f2:int>", "")
       ))
+
+  test("SPARK-7686 DescribeCommand should have correct physical plan output attributes") {
+    val attributes = sql("describe ddlPeople").queryExecution.executedPlan.output
+    assert(attributes.map(_.name) === Seq("col_name", "data_type", "comment"))
+    assert(attributes.map(_.dataType).toSet === Set(StringType))
+  }
 }

From 2ca60ace8f42cf0bd4569d86c86c37a8a2b6a37c Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sun, 17 May 2015 12:43:15 -0700
Subject: [PATCH 222/320] [SPARK-7491] [SQL] Allow configuration of classloader
 isolation for hive

Author: Michael Armbrust <michael@databricks.com>

Closes #6167 from marmbrus/configureIsolation and squashes the following commits:

6147cbe [Michael Armbrust] filter other conf
22cc3bc7 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into configureIsolation
07476ee [Michael Armbrust] filter empty prefixes
dfdf19c [Michael Armbrust] [SPARK-6906][SQL] Allow configuration of classloader isolation for hive
---
 .../apache/spark/sql/hive/HiveContext.scala   | 33 +++++++++++++++++--
 .../hive/client/IsolatedClientLoader.scala    | 14 ++++----
 .../apache/spark/sql/hive/test/TestHive.scala |  9 ++++-
 3 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 9d98c36e947a1..2733ebdb95bca 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -122,6 +122,29 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   protected[hive] def hiveMetastoreJars: String =
     getConf(HIVE_METASTORE_JARS, "builtin")
 
+  /**
+   * A comma separated list of class prefixes that should be loaded using the classloader that
+   * is shared between Spark SQL and a specific version of Hive. An example of classes that should
+   * be shared is JDBC drivers that are needed to talk to the metastore. Other classes that need
+   * to be shared are those that interact with classes that are already shared.  For example,
+   * custom appenders that are used by log4j.
+   */
+  protected[hive] def hiveMetastoreSharedPrefixes: Seq[String] =
+    getConf("spark.sql.hive.metastore.sharedPrefixes", jdbcPrefixes)
+      .split(",").filterNot(_ == "")
+
+  private def jdbcPrefixes = Seq(
+    "com.mysql.jdbc", "org.postgresql", "com.microsoft.sqlserver", "oracle.jdbc").mkString(",")
+
+  /**
+   * A comma separated list of class prefixes that should explicitly be reloaded for each version
+   * of Hive that Spark SQL is communicating with.  For example, Hive UDFs that are declared in a
+   * prefix that typically would be shared (i.e. org.apache.spark.*)
+   */
+  protected[hive] def hiveMetastoreBarrierPrefixes: Seq[String] =
+    getConf("spark.sql.hive.metastore.barrierPrefixes", "")
+      .split(",").filterNot(_ == "")
+
   @transient
   protected[sql] lazy val substitutor = new VariableSubstitution()
 
@@ -179,12 +202,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         version = metaVersion,
         execJars = jars.toSeq,
         config = allConfig,
-        isolationOn = true)
+        isolationOn = true,
+        barrierPrefixes = hiveMetastoreBarrierPrefixes,
+        sharedPrefixes = hiveMetastoreSharedPrefixes)
     } else if (hiveMetastoreJars == "maven") {
       // TODO: Support for loading the jars from an already downloaded location.
       logInfo(
         s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.")
-      IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig )
+      IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig)
     } else {
       // Convert to files and expand any directories.
       val jars =
@@ -210,7 +235,9 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         version = metaVersion,
         execJars = jars.toSeq,
         config = allConfig,
-        isolationOn = true)
+        isolationOn = true,
+        barrierPrefixes = hiveMetastoreBarrierPrefixes,
+        sharedPrefixes = hiveMetastoreSharedPrefixes)
     }
     isolatedLoader.client
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 7f94c93ba49c1..196a3d836cab2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -56,8 +56,7 @@ private[hive] object IsolatedClientLoader {
         (if (version.hasBuiltinsJar) "hive-builtins" :: Nil else Nil))
         .map(a => s"org.apache.hive:$a:${version.fullVersion}") :+
         "com.google.guava:guava:14.0.1" :+
-        "org.apache.hadoop:hadoop-client:2.4.0" :+
-        "mysql:mysql-connector-java:5.1.12"
+        "org.apache.hadoop:hadoop-client:2.4.0"
 
     val classpath = quietly {
       SparkSubmitUtils.resolveMavenCoordinates(
@@ -106,7 +105,9 @@ private[hive] class IsolatedClientLoader(
     val config: Map[String, String] = Map.empty,
     val isolationOn: Boolean = true,
     val rootClassLoader: ClassLoader = ClassLoader.getSystemClassLoader.getParent.getParent,
-    val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader)
+    val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader,
+    val sharedPrefixes: Seq[String] = Seq.empty,
+    val barrierPrefixes: Seq[String] = Seq.empty)
   extends Logging {
 
   // Check to make sure that the root classloader does not know about Hive.
@@ -122,13 +123,14 @@ private[hive] class IsolatedClientLoader(
     name.startsWith("scala.") ||
     name.startsWith("com.google") ||
     name.startsWith("java.lang.") ||
-    name.startsWith("java.net")
+    name.startsWith("java.net") ||
+    sharedPrefixes.exists(name.startsWith)
 
   /** True if `name` refers to a spark class that must see specific version of Hive. */
   protected def isBarrierClass(name: String): Boolean =
-    name.startsWith("org.apache.spark.sql.hive.execution.PairSerDe") ||
     name.startsWith(classOf[ClientWrapper].getName) ||
-    name.startsWith(classOf[ReflectionMagic].getName)
+    name.startsWith(classOf[ReflectionMagic].getName) ||
+    barrierPrefixes.exists(name.startsWith)
 
   protected def classToPath(name: String): String =
     name.replaceAll("\\.", "/") + ".class"
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1598d4bd47550..964828407481e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -48,7 +48,14 @@ import scala.collection.JavaConversions._
 // SPARK-3729: Test key required to check for initialization errors with config.
 object TestHive
   extends TestHiveContext(
-    new SparkContext("local[2]", "TestSQLContext", new SparkConf().set("spark.sql.test", "")))
+    new SparkContext(
+      "local[2]",
+      "TestSQLContext",
+      new SparkConf()
+        .set("spark.sql.test", "")
+        .set(
+          "spark.sql.hive.metastore.barrierPrefixes",
+          "org.apache.spark.sql.hive.execution.PairSerDe")))
 
 /**
  * A locally running test instance of Spark's Hive execution engine.

From ca4257aec658aaa87f4f097dd7534033d5f13ddc Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sun, 17 May 2015 16:49:07 -0700
Subject: [PATCH 223/320] [SPARK-6514] [SPARK-5960] [SPARK-6656] [SPARK-7679]
 [STREAMING] [KINESIS] Updates to the Kinesis API

SPARK-6514 - Use correct region
SPARK-5960 - Allow AWS Credentials to be directly passed
SPARK-6656 - Specify kinesis application name explicitly
SPARK-7679 - Upgrade to latest KCL and AWS SDK.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6147 from tdas/kinesis-api-update and squashes the following commits:

f23ea77 [Tathagata Das] Updated versions and updated APIs
373b201 [Tathagata Das] Updated Kinesis API
---
 .../kinesis/KinesisCheckpointState.scala      |   2 +-
 .../streaming/kinesis/KinesisReceiver.scala   | 152 +++++-----
 .../kinesis/KinesisRecordProcessor.scala      |  32 ++-
 .../streaming/kinesis/KinesisUtils.scala      | 263 +++++++++++++++---
 .../kinesis/KinesisReceiverSuite.scala        |  15 +-
 pom.xml                                       |   4 +-
 6 files changed, 348 insertions(+), 120 deletions(-)

diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
index 588e86a1887ec..1c9b0c218ae18 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala
@@ -48,7 +48,7 @@ private[kinesis] class KinesisCheckpointState(
   /**
    * Advance the checkpoint clock by the checkpoint interval.
    */
-  def advanceCheckpoint() = {
+  def advanceCheckpoint(): Unit = {
     checkpointClock.advance(checkpointInterval.milliseconds)
   }
 }
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index a7fe4476cacb8..01608fbd3fd31 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -16,32 +16,31 @@
  */
 package org.apache.spark.streaming.kinesis
 
-import java.net.InetAddress
 import java.util.UUID
 
+import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials, DefaultAWSCredentialsProviderChain}
+import com.amazonaws.services.kinesis.clientlibrary.interfaces.{IRecordProcessor, IRecordProcessorFactory}
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration, Worker}
+
 import org.apache.spark.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.util.Utils
 
-import com.amazonaws.auth.AWSCredentialsProvider
-import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
-import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor
-import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
+
+private[kinesis]
+case class SerializableAWSCredentials(accessKeyId: String, secretKey: String)
+  extends BasicAWSCredentials(accessKeyId, secretKey) with Serializable
 
 /**
  * Custom AWS Kinesis-specific implementation of Spark Streaming's Receiver.
  * This implementation relies on the Kinesis Client Library (KCL) Worker as described here:
  * https://github.com/awslabs/amazon-kinesis-client
- * This is a custom receiver used with StreamingContext.receiverStream(Receiver) 
- *   as described here:
- *     http://spark.apache.org/docs/latest/streaming-custom-receivers.html
- * Instances of this class will get shipped to the Spark Streaming Workers 
- *   to run within a Spark Executor.
+ * This is a custom receiver used with StreamingContext.receiverStream(Receiver) as described here:
+ *   http://spark.apache.org/docs/latest/streaming-custom-receivers.html
+ * Instances of this class will get shipped to the Spark Streaming Workers to run within a 
+ *   Spark Executor.
  *
  * @param appName  Kinesis application name. Kinesis Apps are mapped to Kinesis Streams
  *                 by the Kinesis Client Library.  If you change the App name or Stream name,
@@ -49,6 +48,8 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
  *                 DynamoDB table with the same name this Kinesis application.
  * @param streamName   Kinesis stream name
  * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+ * @param regionName  Region name used by the Kinesis Client Library for
+ *                    DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
  * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
  *                            See the Kinesis Spark Streaming documentation for more
  *                            details on the different types of checkpoints.
@@ -59,92 +60,103 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker
  *                                 (InitialPositionInStream.TRIM_HORIZON) or
  *                                 the tip of the stream (InitialPositionInStream.LATEST).
  * @param storageLevel Storage level to use for storing the received objects
- *
- * @return ReceiverInputDStream[Array[Byte]]   
+ * @param awsCredentialsOption Optional AWS credentials, used when user directly specifies
+ *                             the credentials
  */
 private[kinesis] class KinesisReceiver(
     appName: String,
     streamName: String,
     endpointUrl: String,
-    checkpointInterval: Duration,
+    regionName: String,
     initialPositionInStream: InitialPositionInStream,
-    storageLevel: StorageLevel)
-  extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
-
-  /*
-   * The following vars are built in the onStart() method which executes in the Spark Worker after
-   *   this code is serialized and shipped remotely.
-   */
-
-  /*
-   *  workerId should be based on the ip address of the actual Spark Worker where this code runs
-   *   (not the Driver's ip address.)
-   */
-  var workerId: String = null
+    checkpointInterval: Duration,
+    storageLevel: StorageLevel,
+    awsCredentialsOption: Option[SerializableAWSCredentials]
+  ) extends Receiver[Array[Byte]](storageLevel) with Logging { receiver =>
 
   /*
-   * This impl uses the DefaultAWSCredentialsProviderChain and searches for credentials 
-   *   in the following order of precedence:
-   * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
-   * Java System Properties - aws.accessKeyId and aws.secretKey
-   * Credential profiles file at the default location (~/.aws/credentials) shared by all 
-   *   AWS SDKs and the AWS CLI
-   * Instance profile credentials delivered through the Amazon EC2 metadata service
+   * =================================================================================
+   * The following vars are initialize in the onStart() method which executes in the
+   * Spark worker after this Receiver is serialized and shipped to the worker.
+   * =================================================================================
    */
-  var credentialsProvider: AWSCredentialsProvider = null
-
-  /* KCL config instance. */
-  var kinesisClientLibConfiguration: KinesisClientLibConfiguration = null
 
-  /*
-   *  RecordProcessorFactory creates impls of IRecordProcessor.
-   *  IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the 
-   *    IRecordProcessor.processRecords() method.
-   *  We're using our custom KinesisRecordProcessor in this case.
+  /**
+   * workerId is used by the KCL should be based on the ip address of the actual Spark Worker where this code runs
+   * (not the driver's IP address.)
    */
-  var recordProcessorFactory: IRecordProcessorFactory = null
+  private var workerId: String = null
 
-  /*
-   * Create a Kinesis Worker.
-   * This is the core client abstraction from the Kinesis Client Library (KCL).
-   * We pass the RecordProcessorFactory from above as well as the KCL config instance.
-   * A Kinesis Worker can process 1..* shards from the given stream - each with its 
-   *   own RecordProcessor.
+  /**
+   * Worker is the core client abstraction from the Kinesis Client Library (KCL).
+   * A worker can process more than one shards from the given stream.
+   * Each shard is assigned its own IRecordProcessor and the worker run multiple such
+   * processors.
    */
-  var worker: Worker = null
+  private var worker: Worker = null
 
   /**
-   *  This is called when the KinesisReceiver starts and must be non-blocking.
-   *  The KCL creates and manages the receiving/processing thread pool through the Worker.run() 
-   *    method.
+   * This is called when the KinesisReceiver starts and must be non-blocking.
+   * The KCL creates and manages the receiving/processing thread pool through Worker.run().
    */
   override def onStart() {
     workerId = Utils.localHostName() + ":" + UUID.randomUUID()
-    credentialsProvider = new DefaultAWSCredentialsProviderChain()
-    kinesisClientLibConfiguration = new KinesisClientLibConfiguration(appName, streamName,
-      credentialsProvider, workerId).withKinesisEndpoint(endpointUrl)
-      .withInitialPositionInStream(initialPositionInStream).withTaskBackoffTimeMillis(500)
-    recordProcessorFactory = new IRecordProcessorFactory {
+
+    // KCL config instance
+    val awsCredProvider = resolveAWSCredentialsProvider()
+    val kinesisClientLibConfiguration =
+      new KinesisClientLibConfiguration(appName, streamName, awsCredProvider, workerId)
+      .withKinesisEndpoint(endpointUrl)
+      .withInitialPositionInStream(initialPositionInStream)
+      .withTaskBackoffTimeMillis(500)
+      .withRegionName(regionName)
+
+   /*
+    *  RecordProcessorFactory creates impls of IRecordProcessor.
+    *  IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the 
+    *  IRecordProcessor.processRecords() method.
+    *  We're using our custom KinesisRecordProcessor in this case.
+    */
+    val recordProcessorFactory = new IRecordProcessorFactory {
       override def createProcessor: IRecordProcessor = new KinesisRecordProcessor(receiver,
         workerId, new KinesisCheckpointState(checkpointInterval))
     }
+
     worker = new Worker(recordProcessorFactory, kinesisClientLibConfiguration)
     worker.run()
+
     logInfo(s"Started receiver with workerId $workerId")
   }
 
   /**
-   *  This is called when the KinesisReceiver stops.
-   *  The KCL worker.shutdown() method stops the receiving/processing threads.
-   *  The KCL will do its best to drain and checkpoint any in-flight records upon shutdown.
+   * This is called when the KinesisReceiver stops.
+   * The KCL worker.shutdown() method stops the receiving/processing threads.
+   * The KCL will do its best to drain and checkpoint any in-flight records upon shutdown.
    */
   override def onStop() {
-    worker.shutdown()
-    logInfo(s"Shut down receiver with workerId $workerId")
+    if (worker != null) {
+      worker.shutdown()
+      logInfo(s"Stopped receiver for workerId $workerId")
+      worker = null
+    }
     workerId = null
-    credentialsProvider = null
-    kinesisClientLibConfiguration = null
-    recordProcessorFactory = null
-    worker = null
+  }
+
+  /**
+   * If AWS credential is provided, return a AWSCredentialProvider returning that credential.
+   * Otherwise, return the DefaultAWSCredentialsProviderChain.
+   */
+  private def resolveAWSCredentialsProvider(): AWSCredentialsProvider = {
+    awsCredentialsOption match {
+      case Some(awsCredentials) =>
+        logInfo("Using provided AWS credentials")
+        new AWSCredentialsProvider {
+          override def getCredentials: AWSCredentials = awsCredentials
+          override def refresh(): Unit = { }
+        }
+      case None =>
+        logInfo("Using DefaultAWSCredentialsProviderChain")
+        new DefaultAWSCredentialsProviderChain()
+    }
   }
 }
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index af8cd875b4541..f65e743c4e2a3 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -35,7 +35,10 @@ import com.amazonaws.services.kinesis.model.Record
 /**
  * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor.
  * This implementation operates on the Array[Byte] from the KinesisReceiver.
- * The Kinesis Worker creates an instance of this KinesisRecordProcessor upon startup.
+ * The Kinesis Worker creates an instance of this KinesisRecordProcessor for each 
+ *   shard in the Kinesis stream upon startup.  This is normally done in separate threads, 
+ *   but the KCLs within the KinesisReceivers will balance themselves out if you create 
+ *   multiple Receivers.
  *
  * @param receiver Kinesis receiver
  * @param workerId for logging purposes
@@ -47,8 +50,8 @@ private[kinesis] class KinesisRecordProcessor(
     workerId: String,
     checkpointState: KinesisCheckpointState) extends IRecordProcessor with Logging {
 
-  /* shardId to be populated during initialize() */
-  var shardId: String = _
+  // shardId to be populated during initialize()
+  private var shardId: String = _
 
   /**
    * The Kinesis Client Library calls this method during IRecordProcessor initialization.
@@ -56,8 +59,8 @@ private[kinesis] class KinesisRecordProcessor(
    * @param shardId assigned by the KCL to this particular RecordProcessor.
    */
   override def initialize(shardId: String) {
-    logInfo(s"Initialize:  Initializing workerId $workerId with shardId $shardId")
     this.shardId = shardId
+    logInfo(s"Initialized workerId $workerId with shardId $shardId")
   }
 
   /**
@@ -73,12 +76,17 @@ private[kinesis] class KinesisRecordProcessor(
     if (!receiver.isStopped()) {
       try {
         /*
-         * Note:  If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming
-         * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the
-         *   internally-configured Spark serializer (kryo, etc).
-         * This is not desirable, so we instead store a raw Array[Byte] and decouple
-         *   ourselves from Spark's internal serialization strategy.
-         */
+         * Notes:  
+         * 1) If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming
+         *    Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the
+         *    internally-configured Spark serializer (kryo, etc).
+         * 2) This is not desirable, so we instead store a raw Array[Byte] and decouple
+         *    ourselves from Spark's internal serialization strategy.
+         * 3) For performance, the BlockGenerator is asynchronously queuing elements within its
+         *    memory before creating blocks.  This prevents the small block scenario, but requires
+         *    that you register callbacks to know when a block has been generated and stored 
+         *    (WAL is sufficient for storage) before can checkpoint back to the source.
+        */
         batch.foreach(record => receiver.store(record.getData().array()))
         
         logDebug(s"Stored:  Worker $workerId stored ${batch.size} records for shardId $shardId")
@@ -116,7 +124,7 @@ private[kinesis] class KinesisRecordProcessor(
           logError(s"Exception:  WorkerId $workerId encountered and exception while storing " +
               " or checkpointing a batch for workerId $workerId and shardId $shardId.", e)
 
-          /* Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor.*/
+          /* Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor. */
           throw e
         }
       }
@@ -190,7 +198,7 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
                logError(s"Retryable Exception:  Random backOffMillis=${backOffMillis}", e)
                retryRandom(expression, numRetriesLeft - 1, maxBackOffMillis)
              }
-        /* Throw:  Shutdown has been requested by the Kinesis Client Library.*/
+        /* Throw:  Shutdown has been requested by the Kinesis Client Library. */
         case _: ShutdownException => {
           logError(s"ShutdownException:  Caught shutdown exception, skipping checkpoint.", e)
           throw e
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index 96f4399accd3a..b114bcff92d0f 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -16,29 +16,75 @@
  */
 package org.apache.spark.streaming.kinesis
 
-import org.apache.spark.annotation.Experimental
+import com.amazonaws.regions.RegionUtils
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.Duration
-import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.api.java.JavaReceiverInputDStream
-import org.apache.spark.streaming.api.java.JavaStreamingContext
+import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
-
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import org.apache.spark.streaming.{Duration, StreamingContext}
 
 
-/**
- * Helper class to create Amazon Kinesis Input Stream
- * :: Experimental ::
- */
-@Experimental
 object KinesisUtils {
   /**
-   * Create an InputDStream that pulls messages from a Kinesis stream.
-   * :: Experimental ::
-   * @param ssc    StreamingContext object
+   * Create an input stream that pulls messages from a Kinesis stream.
+   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
+   *
+   * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   * gets the AWS credentials.
+   *
+   * @param ssc StreamingContext object
+   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
+   *                        (KCL) to update DynamoDB
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
+   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param storageLevel Storage level to use for storing the received objects.
+   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
+   */
+  def createStream(
+      ssc: StreamingContext,
+      kinesisAppName:  String,
+      streamName: String,
+      endpointUrl: String,
+      regionName: String,
+      initialPositionInStream: InitialPositionInStream,
+      checkpointInterval: Duration,
+      storageLevel: StorageLevel
+    ): ReceiverInputDStream[Array[Byte]] = {
+    ssc.receiverStream(
+      new KinesisReceiver(kinesisAppName, streamName, endpointUrl, validateRegion(regionName),
+        initialPositionInStream, checkpointInterval, storageLevel, None))
+  }
+
+  /**
+   * Create an input stream that pulls messages from a Kinesis stream.
+   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
+   *
+   * Note:
+   *  The given AWS credentials will get saved in DStream checkpoints if checkpointing
+   *  is enabled. Make sure that your checkpoint directory is secure.
+   *
+   * @param ssc StreamingContext object
+   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
+   *                        (KCL) to update DynamoDB
    * @param streamName   Kinesis stream name
    * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
+   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
+   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
+   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
    * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
    *                            See the Kinesis Spark Streaming documentation for more
    *                            details on the different types of checkpoints.
@@ -48,28 +94,84 @@ object KinesisUtils {
    *                                 per Kinesis' limit of 24 hours
    *                                 (InitialPositionInStream.TRIM_HORIZON) or
    *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param storageLevel Storage level to use for storing the received objects
+   * @param storageLevel Storage level to use for storing the received objects.
+   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
+   */
+  def createStream(
+      ssc: StreamingContext,
+      kinesisAppName:  String,
+      streamName: String,
+      endpointUrl: String,
+      regionName: String,
+      initialPositionInStream: InitialPositionInStream,
+      checkpointInterval: Duration,
+      storageLevel: StorageLevel,
+      awsAccessKeyId: String,
+      awsSecretKey: String
+    ): ReceiverInputDStream[Array[Byte]] = {
+    ssc.receiverStream(
+      new KinesisReceiver(kinesisAppName, streamName, endpointUrl, validateRegion(regionName),
+        initialPositionInStream, checkpointInterval, storageLevel,
+        Some(SerializableAWSCredentials(awsAccessKeyId, awsSecretKey))))
+  }
+
+  /**
+   * Create an input stream that pulls messages from a Kinesis stream.
+   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * @return ReceiverInputDStream[Array[Byte]]
+   * Note:
+   * - The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   *   on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   *   gets AWS credentials.
+   * - The region of the `endpointUrl` will be used for DynamoDB and CloudWatch.
+   * - The Kinesis application name used by the Kinesis Client Library (KCL) will be the app name in
+   *   [[org.apache.spark.SparkConf]].
+   *
+   * @param ssc Java StreamingContext object
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Endpoint url of Kinesis service
+   *                     (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects
+   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
    */
-  @Experimental
+  @deprecated("use other forms of createStream", "1.4.0")
   def createStream(
       ssc: StreamingContext,
       streamName: String,
       endpointUrl: String,
       checkpointInterval: Duration,
       initialPositionInStream: InitialPositionInStream,
-      storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = {
-    ssc.receiverStream(new KinesisReceiver(ssc.sc.appName, streamName, endpointUrl,
-        checkpointInterval, initialPositionInStream, storageLevel))
+      storageLevel: StorageLevel
+    ): ReceiverInputDStream[Array[Byte]] = {
+    ssc.receiverStream(
+      new KinesisReceiver(ssc.sc.appName, streamName, endpointUrl, getRegionByEndpoint(endpointUrl),
+        initialPositionInStream, checkpointInterval, storageLevel, None))
   }
 
   /**
-   * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream.
-   * :: Experimental ::
+   * Create an input stream that pulls messages from a Kinesis stream.
+   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
+   *
+   * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   * gets the AWS credentials.
+   *
    * @param jssc Java StreamingContext object
+   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
+   *                        (KCL) to update DynamoDB
    * @param streamName   Kinesis stream name
    * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
+   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
    * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
    *                            See the Kinesis Spark Streaming documentation for more
    *                            details on the different types of checkpoints.
@@ -79,19 +181,116 @@ object KinesisUtils {
    *                                 per Kinesis' limit of 24 hours
    *                                 (InitialPositionInStream.TRIM_HORIZON) or
    *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param storageLevel Storage level to use for storing the received objects
+   * @param storageLevel Storage level to use for storing the received objects.
+   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      kinesisAppName: String,
+      streamName: String,
+      endpointUrl: String,
+      regionName: String,
+      initialPositionInStream: InitialPositionInStream,
+      checkpointInterval: Duration,
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[Array[Byte]] = {
+    createStream(jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
+      initialPositionInStream, checkpointInterval, storageLevel)
+  }
+
+  /**
+   * Create an input stream that pulls messages from a Kinesis stream.
+   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * @return JavaReceiverInputDStream[Array[Byte]]
+   * Note:
+   *  The given AWS credentials will get saved in DStream checkpoints if checkpointing
+   *  is enabled. Make sure that your checkpoint directory is secure.
+   *
+   * @param jssc Java StreamingContext object
+   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
+   *                        (KCL) to update DynamoDB
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
+   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
+   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
+   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects.
+   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
    */
-  @Experimental
   def createStream(
-      jssc: JavaStreamingContext, 
-      streamName: String, 
-      endpointUrl: String, 
+      jssc: JavaStreamingContext,
+      kinesisAppName: String,
+      streamName: String,
+      endpointUrl: String,
+      regionName: String,
+      initialPositionInStream: InitialPositionInStream,
+      checkpointInterval: Duration,
+      storageLevel: StorageLevel,
+      awsAccessKeyId: String,
+      awsSecretKey: String
+    ): JavaReceiverInputDStream[Array[Byte]] = {
+    createStream(jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
+        initialPositionInStream, checkpointInterval, storageLevel, awsAccessKeyId, awsSecretKey)
+  }
+
+  /**
+   * Create an input stream that pulls messages from a Kinesis stream.
+   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
+   *
+   * Note:
+   * - The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   *   on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   *   gets AWS credentials.
+   * - The region of the `endpointUrl` will be used for DynamoDB and CloudWatch.
+   * - The Kinesis application name used by the Kinesis Client Library (KCL) will be the app name in
+   *   [[org.apache.spark.SparkConf]].
+   *
+   * @param jssc Java StreamingContext object
+   * @param streamName   Kinesis stream name
+   * @param endpointUrl  Endpoint url of Kinesis service
+   *                     (e.g., https://kinesis.us-east-1.amazonaws.com)
+   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
+   *                            See the Kinesis Spark Streaming documentation for more
+   *                            details on the different types of checkpoints.
+   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
+   *                                 worker's initial starting position in the stream.
+   *                                 The values are either the beginning of the stream
+   *                                 per Kinesis' limit of 24 hours
+   *                                 (InitialPositionInStream.TRIM_HORIZON) or
+   *                                 the tip of the stream (InitialPositionInStream.LATEST).
+   * @param storageLevel Storage level to use for storing the received objects
+   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
+   */
+  @deprecated("use other forms of createStream", "1.4.0")
+  def createStream(
+      jssc: JavaStreamingContext,
+      streamName: String,
+      endpointUrl: String,
       checkpointInterval: Duration,
       initialPositionInStream: InitialPositionInStream,
-      storageLevel: StorageLevel): JavaReceiverInputDStream[Array[Byte]] = {
-    jssc.receiverStream(new KinesisReceiver(jssc.ssc.sc.appName, streamName,
-        endpointUrl, checkpointInterval, initialPositionInStream, storageLevel))
+      storageLevel: StorageLevel
+    ): JavaReceiverInputDStream[Array[Byte]] = {
+    createStream(
+      jssc.ssc, streamName, endpointUrl, checkpointInterval, initialPositionInStream, storageLevel)
+  }
+
+  private def getRegionByEndpoint(endpointUrl: String): String = {
+    RegionUtils.getRegionByEndpoint(endpointUrl).getName()
+  }
+
+  private def validateRegion(regionName: String): String = {
+    Option(RegionUtils.getRegion(regionName)).map { _.getName }.getOrElse {
+      throw new IllegalArgumentException(s"Region name '$regionName' is not valid")
+    }
   }
 }
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 255fe65819608..7c17ee9dceddd 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -40,6 +40,7 @@ import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorC
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
+import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
 
 /**
  * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor
@@ -81,12 +82,20 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
       checkpointStateMock, currentClockMock)
   }
 
-  test("kinesis utils api") {
+  test("KinesisUtils API") {
     val ssc = new StreamingContext(master, framework, batchDuration)
     // Tests the API, does not actually test data receiving
-    val kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream",
+    val kinesisStream1 = KinesisUtils.createStream(ssc, "mySparkStream",
       "https://kinesis.us-west-2.amazonaws.com", Seconds(2),
-      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2);
+      InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+    val kinesisStream2 = KinesisUtils.createStream(ssc, "myAppNam", "mySparkStream",
+      "https://kinesis.us-west-2.amazonaws.com", "us-west-2",
+      InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2)
+    val kinesisStream3 = KinesisUtils.createStream(ssc, "myAppNam", "mySparkStream",
+      "https://kinesis.us-west-2.amazonaws.com", "us-west-2",
+      InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2,
+      "awsAccessKey", "awsSecretKey")
+
     ssc.stop()
   }
 
diff --git a/pom.xml b/pom.xml
index 6768a039d11e0..6f525b6ac81a3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -148,8 +148,8 @@
     <avro.version>1.7.7</avro.version>
     <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
     <jets3t.version>0.7.1</jets3t.version>
-    <aws.java.sdk.version>1.8.3</aws.java.sdk.version>
-    <aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
+    <aws.java.sdk.version>1.9.16</aws.java.sdk.version>
+    <aws.kinesis.client.version>1.2.1</aws.kinesis.client.version>
     <commons.httpclient.version>4.3.2</commons.httpclient.version>
     <commons.math3.version>3.4.1</commons.math3.version>
     <test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file>

From 2f22424e9f6624097b292cb70e00787b69d80718 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Sun, 17 May 2015 16:51:57 -0700
Subject: [PATCH 224/320] [SQL] [MINOR] use catalyst type converter in ScalaUdf

It's a follow-up of https://github.com/apache/spark/pull/5154, we can speed up scala udf evaluation by create type converter in advance.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6182 from cloud-fan/tmp and squashes the following commits:

241cfe9 [Wenchen Fan] use converter in ScalaUdf
---
 .../org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
index 9a77ca624ebe2..d22eb10ad399f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
@@ -956,7 +956,7 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi
   }
 
   // scalastyle:on
-
-  override def eval(input: Row): Any = CatalystTypeConverters.convertToCatalyst(f(input), dataType)
+  val converter = CatalystTypeConverters.createToCatalystConverter(dataType)
+  override def eval(input: Row): Any = converter(f(input))
 
 }

From ff71d34e00b64d70f671f9bf3e63aec39cd525e5 Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Sun, 17 May 2015 20:37:19 -0700
Subject: [PATCH 225/320] [SPARK-7693][Core] Remove "import
 scala.concurrent.ExecutionContext.Implicits.global"

Learnt a lesson from SPARK-7655: Spark should avoid to use `scala.concurrent.ExecutionContext.Implicits.global` because the user may submit blocking actions to `scala.concurrent.ExecutionContext.Implicits.global` and exhaust all threads in it. This could crash Spark. So Spark should always use its own thread pools for safety.

This PR removes all usages of `scala.concurrent.ExecutionContext.Implicits.global` and uses proper thread pools to replace them.

Author: zsxwing <zsxwing@gmail.com>

Closes #6223 from zsxwing/SPARK-7693 and squashes the following commits:

a33ff06 [zsxwing] Decrease the max thread number from 1024 to 128
cf4b3fc [zsxwing] Remove "import scala.concurrent.ExecutionContext.Implicits.global"
---
 .../CoarseGrainedExecutorBackend.scala        |  9 +++---
 .../apache/spark/rdd/AsyncRDDActions.scala    | 13 +++++++--
 .../apache/spark/storage/BlockManager.scala   | 17 ++++++++---
 .../spark/storage/BlockManagerMaster.scala    | 29 ++++++++++++-------
 .../execution/joins/BroadcastHashJoin.scala   |  2 +-
 .../receiver/ReceiverSupervisor.scala         | 14 ++++++---
 6 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index ed159dec4f998..f3a26f54a81fb 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -33,7 +33,7 @@ import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.scheduler.TaskDescription
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.serializer.SerializerInstance
-import org.apache.spark.util.{SignalLogger, Utils}
+import org.apache.spark.util.{ThreadUtils, SignalLogger, Utils}
 
 private[spark] class CoarseGrainedExecutorBackend(
     override val rpcEnv: RpcEnv,
@@ -55,18 +55,19 @@ private[spark] class CoarseGrainedExecutorBackend(
   private[this] val ser: SerializerInstance = env.closureSerializer.newInstance()
 
   override def onStart() {
-    import scala.concurrent.ExecutionContext.Implicits.global
     logInfo("Connecting to driver: " + driverUrl)
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
+      // This is a very fast action so we can use "ThreadUtils.sameThread"
       driver = Some(ref)
       ref.ask[RegisteredExecutor.type](
         RegisterExecutor(executorId, self, hostPort, cores, extractLogUrls))
-    } onComplete {
+    }(ThreadUtils.sameThread).onComplete {
+      // This is a very fast action so we can use "ThreadUtils.sameThread"
       case Success(msg) => Utils.tryLogNonFatalError {
         Option(self).foreach(_.send(msg)) // msg must be RegisteredExecutor
       }
       case Failure(e) => logError(s"Cannot register with driver: $driverUrl", e)
-    }
+    }(ThreadUtils.sameThread)
   }
 
   def extractLogUrls: Map[String, String] = {
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index ec185340c3a2d..bbf1b83af0795 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -19,8 +19,10 @@ package org.apache.spark.rdd
 
 import java.util.concurrent.atomic.AtomicLong
 
+import org.apache.spark.util.ThreadUtils
+
 import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.ExecutionContext
 import scala.reflect.ClassTag
 
 import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
@@ -66,6 +68,8 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
     val f = new ComplexFutureAction[Seq[T]]
 
     f.run {
+      // This is a blocking action so we should use "AsyncRDDActions.futureExecutionContext" which
+      // is a cached thread pool.
       val results = new ArrayBuffer[T](num)
       val totalParts = self.partitions.length
       var partsScanned = 0
@@ -101,7 +105,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
         partsScanned += numPartsToTry
       }
       results.toSeq
-    }
+    }(AsyncRDDActions.futureExecutionContext)
 
     f
   }
@@ -123,3 +127,8 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
       (index, data) => Unit, Unit)
   }
 }
+
+private object AsyncRDDActions {
+  val futureExecutionContext = ExecutionContext.fromExecutorService(
+    ThreadUtils.newDaemonCachedThreadPool("AsyncRDDActions-future", 128))
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index cc794e5c90ffa..16d67cbfca80b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -21,8 +21,7 @@ import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream,
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
-import scala.concurrent.{Await, Future}
-import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.{ExecutionContext, Await, Future}
 import scala.concurrent.duration._
 import scala.util.Random
 
@@ -77,6 +76,9 @@ private[spark] class BlockManager(
 
   private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo]
 
+  private val futureExecutionContext = ExecutionContext.fromExecutorService(
+    ThreadUtils.newDaemonCachedThreadPool("block-manager-future", 128))
+
   // Actual storage of where blocks are kept
   private var externalBlockStoreInitialized = false
   private[spark] val memoryStore = new MemoryStore(this, maxMemory)
@@ -266,11 +268,13 @@ private[spark] class BlockManager(
     asyncReregisterLock.synchronized {
       if (asyncReregisterTask == null) {
         asyncReregisterTask = Future[Unit] {
+          // This is a blocking action and should run in futureExecutionContext which is a cached
+          // thread pool
           reregister()
           asyncReregisterLock.synchronized {
             asyncReregisterTask = null
           }
-        }
+        }(futureExecutionContext)
       }
     }
   }
@@ -744,7 +748,11 @@ private[spark] class BlockManager(
       case b: ByteBufferValues if putLevel.replication > 1 =>
         // Duplicate doesn't copy the bytes, but just creates a wrapper
         val bufferView = b.buffer.duplicate()
-        Future { replicate(blockId, bufferView, putLevel) }
+        Future {
+          // This is a blocking action and should run in futureExecutionContext which is a cached
+          // thread pool
+          replicate(blockId, bufferView, putLevel)
+        }(futureExecutionContext)
       case _ => null
     }
 
@@ -1218,6 +1226,7 @@ private[spark] class BlockManager(
     }
     metadataCleaner.cancel()
     broadcastCleaner.cancel()
+    futureExecutionContext.shutdownNow()
     logInfo("BlockManager stopped")
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index a85e1c7632973..abcad9438bf28 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.storage
 
+import scala.collection.Iterable
+import scala.collection.generic.CanBuildFrom
 import scala.concurrent.{Await, Future}
-import scala.concurrent.ExecutionContext.Implicits.global
 
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.storage.BlockManagerMessages._
-import org.apache.spark.util.RpcUtils
+import org.apache.spark.util.{ThreadUtils, RpcUtils}
 
 private[spark]
 class BlockManagerMaster(
@@ -102,8 +103,8 @@ class BlockManagerMaster(
     val future = driverEndpoint.askWithRetry[Future[Seq[Int]]](RemoveRdd(rddId))
     future.onFailure {
       case e: Exception =>
-        logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}}")
-    }
+        logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}}", e)
+    }(ThreadUtils.sameThread)
     if (blocking) {
       Await.result(future, timeout)
     }
@@ -114,8 +115,8 @@ class BlockManagerMaster(
     val future = driverEndpoint.askWithRetry[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
     future.onFailure {
       case e: Exception =>
-        logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}}")
-    }
+        logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}}", e)
+    }(ThreadUtils.sameThread)
     if (blocking) {
       Await.result(future, timeout)
     }
@@ -128,8 +129,8 @@ class BlockManagerMaster(
     future.onFailure {
       case e: Exception =>
         logWarning(s"Failed to remove broadcast $broadcastId" +
-          s" with removeFromMaster = $removeFromMaster - ${e.getMessage}}")
-    }
+          s" with removeFromMaster = $removeFromMaster - ${e.getMessage}}", e)
+    }(ThreadUtils.sameThread)
     if (blocking) {
       Await.result(future, timeout)
     }
@@ -169,11 +170,17 @@ class BlockManagerMaster(
     val response = driverEndpoint.
       askWithRetry[Map[BlockManagerId, Future[Option[BlockStatus]]]](msg)
     val (blockManagerIds, futures) = response.unzip
-    val result = Await.result(Future.sequence(futures), timeout)
-    if (result == null) {
+    implicit val sameThread = ThreadUtils.sameThread
+    val cbf =
+      implicitly[
+        CanBuildFrom[Iterable[Future[Option[BlockStatus]]],
+        Option[BlockStatus],
+        Iterable[Option[BlockStatus]]]]
+    val blockStatus = Await.result(
+      Future.sequence[Option[BlockStatus], Iterable](futures)(cbf, ThreadUtils.sameThread), timeout)
+    if (blockStatus == null) {
       throw new SparkException("BlockManager returned null for BlockStatus query: " + blockId)
     }
-    val blockStatus = result.asInstanceOf[Iterable[Option[BlockStatus]]]
     blockManagerIds.zip(blockStatus).flatMap { case (blockManagerId, status) =>
       status.map { s => (blockManagerId, s) }
     }.toMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
index fe43fc4125c8e..b8b12be8756f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
@@ -78,5 +78,5 @@ case class BroadcastHashJoin(
 object BroadcastHashJoin {
 
   private val broadcastHashJoinExecutionContext = ExecutionContext.fromExecutorService(
-    ThreadUtils.newDaemonCachedThreadPool("broadcast-hash-join", 1024))
+    ThreadUtils.newDaemonCachedThreadPool("broadcast-hash-join", 128))
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index 4943f29395d12..33be067ebdaf2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -18,14 +18,14 @@
 package org.apache.spark.streaming.receiver
 
 import java.nio.ByteBuffer
+import java.util.concurrent.CountDownLatch
 
 import scala.collection.mutable.ArrayBuffer
+import scala.concurrent._
 
 import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.storage.StreamBlockId
-import java.util.concurrent.CountDownLatch
-import scala.concurrent._
-import ExecutionContext.Implicits.global
+import org.apache.spark.util.ThreadUtils
 
 /**
  * Abstract class that is responsible for supervising a Receiver in the worker.
@@ -46,6 +46,9 @@ private[streaming] abstract class ReceiverSupervisor(
   // Attach the executor to the receiver
   receiver.attachExecutor(this)
 
+  private val futureExecutionContext = ExecutionContext.fromExecutorService(
+    ThreadUtils.newDaemonCachedThreadPool("receiver-supervisor-future", 128))
+
   /** Receiver id */
   protected val streamId = receiver.streamId
 
@@ -111,6 +114,7 @@ private[streaming] abstract class ReceiverSupervisor(
     stoppingError = error.orNull
     stopReceiver(message, error)
     onStop(message, error)
+    futureExecutionContext.shutdownNow()
     stopLatch.countDown()
   }
 
@@ -150,6 +154,8 @@ private[streaming] abstract class ReceiverSupervisor(
   /** Restart receiver with delay */
   def restartReceiver(message: String, error: Option[Throwable], delay: Int) {
     Future {
+      // This is a blocking action so we should use "futureExecutionContext" which is a cached
+      // thread pool.
       logWarning("Restarting receiver with delay " + delay + " ms: " + message,
         error.getOrElse(null))
       stopReceiver("Restarting receiver with delay " + delay + "ms: " + message, error)
@@ -158,7 +164,7 @@ private[streaming] abstract class ReceiverSupervisor(
       logInfo("Starting receiver again")
       startReceiver()
       logInfo("Receiver started again")
-    }
+    }(futureExecutionContext)
   }
 
   /** Check if receiver has been marked for stopping */

From 775e6f9909d4495cbc11c377508b43482d782742 Mon Sep 17 00:00:00 2001
From: Shuo Xiang <shuoxiangpub@gmail.com>
Date: Sun, 17 May 2015 21:16:52 -0700
Subject: [PATCH 226/320] [SPARK-7694] [MLLIB] Use getOrElse for getting the
 threshold of LR model

The `toString` method of `LogisticRegressionModel` calls `get` method on an Option (threshold) without a safeguard. In spark-shell, the following code `val model = algorithm.run(data).clearThreshold()` in lbfgs code will fail as `toString `method will be called right after `clearThreshold()` to show the results in the REPL.

Author: Shuo Xiang <shuoxiangpub@gmail.com>

Closes #6224 from coderxiang/getorelse and squashes the following commits:

d5f53c9 [Shuo Xiang] use getOrElse for getting the threshold of LR model
5f109b4 [Shuo Xiang] Merge remote-tracking branch 'upstream/master'
c5c5bfe [Shuo Xiang] Merge remote-tracking branch 'upstream/master'
98804c9 [Shuo Xiang] fix bug in topBykey and update test
---
 .../apache/spark/mllib/classification/LogisticRegression.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index bd2e9079ce1ae..2df4d21e8cd55 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -163,7 +163,7 @@ class LogisticRegressionModel (
   override protected def formatVersion: String = "1.0"
 
   override def toString: String = {
-    s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.get}"
+    s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
   }
 }
 

From e32c0f69f38ad729e25c2d5f90eb73b4453f8279 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 18 May 2015 01:10:55 -0700
Subject: [PATCH 227/320] [SPARK-7299][SQL] Set precision and scale for Decimal
 according to JDBC metadata instead of returned BigDecimal

JIRA: https://issues.apache.org/jira/browse/SPARK-7299

When connecting with oracle db through jdbc, the precision and scale of `BigDecimal` object returned by `ResultSet.getBigDecimal` is not correctly matched to the table schema reported by `ResultSetMetaData.getPrecision` and `ResultSetMetaData.getScale`.

So in case you insert a value like `19999` into a column with `NUMBER(12, 2)` type, you get through a `BigDecimal` object with scale as 0. But the dataframe schema has correct type as `DecimalType(12, 2)`. Thus, after you save the dataframe into parquet file and then retrieve it, you will get wrong result `199.99`.

Because it is reported to be problematic on jdbc connection with oracle db. It might be difficult to add test case for it. But according to the user's test on JIRA, it solves this problem.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #5833 from viirya/jdbc_decimal_precision and squashes the following commits:

69bc2b5 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into jdbc_decimal_precision
928f864 [Liang-Chi Hsieh] Add comments.
5f9da94 [Liang-Chi Hsieh] Set up Decimal's precision and scale according to table schema instead of returned BigDecimal.
---
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 95935ba874a72..4189dfcf956c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -300,7 +300,7 @@ private[sql] class JDBCRDD(
   abstract class JDBCConversion
   case object BooleanConversion extends JDBCConversion
   case object DateConversion extends JDBCConversion
-  case object DecimalConversion extends JDBCConversion
+  case class  DecimalConversion(precisionInfo: Option[(Int, Int)]) extends JDBCConversion
   case object DoubleConversion extends JDBCConversion
   case object FloatConversion extends JDBCConversion
   case object IntegerConversion extends JDBCConversion
@@ -317,8 +317,8 @@ private[sql] class JDBCRDD(
     schema.fields.map(sf => sf.dataType match {
       case BooleanType           => BooleanConversion
       case DateType              => DateConversion
-      case DecimalType.Unlimited => DecimalConversion
-      case DecimalType.Fixed(d)  => DecimalConversion
+      case DecimalType.Unlimited => DecimalConversion(None)
+      case DecimalType.Fixed(d)  => DecimalConversion(Some(d))
       case DoubleType            => DoubleConversion
       case FloatType             => FloatConversion
       case IntegerType           => IntegerConversion
@@ -375,7 +375,22 @@ private[sql] class JDBCRDD(
               } else {
                 mutableRow.update(i, null)
               }
-            case DecimalConversion    =>
+            // When connecting with Oracle DB through JDBC, the precision and scale of BigDecimal
+            // object returned by ResultSet.getBigDecimal is not correctly matched to the table
+            // schema reported by ResultSetMetaData.getPrecision and ResultSetMetaData.getScale.
+            // If inserting values like 19999 into a column with NUMBER(12, 2) type, you get through
+            // a BigDecimal object with scale as 0. But the dataframe schema has correct type as
+            // DecimalType(12, 2). Thus, after saving the dataframe into parquet file and then
+            // retrieve it, you will get wrong result 199.99.
+            // So it is needed to set precision and scale for Decimal based on JDBC metadata.
+            case DecimalConversion(Some((p, s))) =>
+              val decimalVal = rs.getBigDecimal(pos)
+              if (decimalVal == null) {
+                mutableRow.update(i, null)
+              } else {
+                mutableRow.update(i, Decimal(decimalVal, p, s))
+              }
+            case DecimalConversion(None) =>
               val decimalVal = rs.getBigDecimal(pos)
               if (decimalVal == null) {
                 mutableRow.update(i, null)

From 1ecfac6e387b0934bfb5a9bbb4ad74b81ec210a4 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 18 May 2015 08:35:14 -0700
Subject: [PATCH 228/320] [SPARK-6657] [PYSPARK] Fix doc warnings

Fixed the following warnings in `make clean html` under `python/docs`:

~~~
/Users/meng/src/spark/python/pyspark/mllib/evaluation.py:docstring of pyspark.mllib.evaluation.RankingMetrics.ndcgAt:3: ERROR: Unexpected indentation.
/Users/meng/src/spark/python/pyspark/mllib/evaluation.py:docstring of pyspark.mllib.evaluation.RankingMetrics.ndcgAt:4: WARNING: Block quote ends without a blank line; unexpected unindent.
/Users/meng/src/spark/python/pyspark/mllib/fpm.py:docstring of pyspark.mllib.fpm.FPGrowth.train:3: ERROR: Unexpected indentation.
/Users/meng/src/spark/python/pyspark/mllib/fpm.py:docstring of pyspark.mllib.fpm.FPGrowth.train:4: WARNING: Block quote ends without a blank line; unexpected unindent.
/Users/meng/src/spark/python/pyspark/sql/__init__.py:docstring of pyspark.sql.DataFrame.replace:16: WARNING: Field list ends without a blank line; unexpected unindent.
/Users/meng/src/spark/python/pyspark/streaming/kafka.py:docstring of pyspark.streaming.kafka.KafkaUtils.createRDD:8: ERROR: Unexpected indentation.
/Users/meng/src/spark/python/pyspark/streaming/kafka.py:docstring of pyspark.streaming.kafka.KafkaUtils.createRDD:9: WARNING: Block quote ends without a blank line; unexpected unindent.
~~~

davies

Author: Xiangrui Meng <meng@databricks.com>

Closes #6221 from mengxr/SPARK-6657 and squashes the following commits:

e3f83fe [Xiangrui Meng] fix sql and streaming doc warnings
2b4371e [Xiangrui Meng] fix mllib python doc warnings
---
 python/pyspark/mllib/evaluation.py |  5 ++---
 python/pyspark/mllib/fpm.py        | 12 ++++++------
 python/pyspark/sql/dataframe.py    |  1 +
 python/pyspark/streaming/kafka.py  |  3 ++-
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 4c777f2180dc9..a5e5ddc8fe506 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -334,11 +334,10 @@ def ndcgAt(self, k):
         """
         Compute the average NDCG value of all the queries, truncated at ranking position k.
         The discounted cumulative gain at position k is computed as:
-            sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
+        sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
         and the NDCG is obtained by dividing the DCG value on the ground truth set.
         In the current implementation, the relevance value is binary.
-
-        If a query has an empty ground truth set, zero will be used as ndcg together with
+        If a query has an empty ground truth set, zero will be used as NDCG together with
         a log warning.
         """
         return self.call("ndcgAt", int(k))
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index d8df02bdbaba9..bdc4a132b1b18 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -61,12 +61,12 @@ class FPGrowth(object):
     def train(cls, data, minSupport=0.3, numPartitions=-1):
         """
         Computes an FP-Growth model that contains frequent itemsets.
-        :param data:            The input data set, each element
-                                contains a transaction.
-        :param minSupport:      The minimal support level
-                                (default: `0.3`).
-        :param numPartitions:   The number of partitions used by parallel
-                                FP-growth (default: same as input data).
+
+        :param data: The input data set, each element contains a
+            transaction.
+        :param minSupport: The minimal support level (default: `0.3`).
+        :param numPartitions: The number of partitions used by
+            parallel FP-growth (default: same as input data).
         """
         model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions))
         return FPGrowthModel(model)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 96d927b9ba35c..e4a191a9ef07f 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -943,6 +943,7 @@ def replace(self, to_replace, value, subset=None):
             Columns specified in subset that do not have matching data type are ignored.
             For example, if `value` is a string, and subset contains a non-string column,
             then the non-string column is simply ignored.
+
         >>> df4.replace(10, 20).show()
         +----+------+-----+
         | age|height| name|
diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
index e278b29003f69..10a859a532e28 100644
--- a/python/pyspark/streaming/kafka.py
+++ b/python/pyspark/streaming/kafka.py
@@ -132,11 +132,12 @@ def createRDD(sc, kafkaParams, offsetRanges, leaders={},
         .. note:: Experimental
 
         Create a RDD from Kafka using offset ranges for each topic and partition.
+
         :param sc:  SparkContext object
         :param kafkaParams: Additional params for Kafka
         :param offsetRanges:  list of offsetRange to specify topic:partition:[start, end) to consume
         :param leaders: Kafka brokers for each TopicAndPartition in offsetRanges.  May be an empty
-                        map, in which case leaders will be looked up on the driver.
+            map, in which case leaders will be looked up on the driver.
         :param keyDecoder:  A function used to decode key (default is utf8_decoder)
         :param valueDecoder:  A function used to decode value (default is utf8_decoder)
         :return: A RDD object

From 814b3dabdf01abc7a2f25aa32284caccadeb7798 Mon Sep 17 00:00:00 2001
From: Vincenzo Selvaggio <vselvaggio@hotmail.it>
Date: Mon, 18 May 2015 08:46:33 -0700
Subject: [PATCH 229/320] [SPARK-7272] [MLLIB] User guide for PMML model export

https://issues.apache.org/jira/browse/SPARK-7272

Author: Vincenzo Selvaggio <vselvaggio@hotmail.it>

Closes #6219 from selvinsource/mllib_pmml_model_export_SPARK-7272 and squashes the following commits:

c866fb8 [Vincenzo Selvaggio] Update mllib-pmml-model-export.md
1beda98 [Vincenzo Selvaggio] [SPARK-7272] Initial user guide for pmml export
d670662 [Vincenzo Selvaggio] Update mllib-pmml-model-export.md
2731375 [Vincenzo Selvaggio] Update mllib-pmml-model-export.md
680dc33 [Vincenzo Selvaggio] Update mllib-pmml-model-export.md
2e298b5 [Vincenzo Selvaggio] Update mllib-pmml-model-export.md
a932f51 [Vincenzo Selvaggio] Create mllib-pmml-model-export.md
---
 docs/mllib-guide.md             |  1 +
 docs/mllib-pmml-model-export.md | 86 +++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 docs/mllib-pmml-model-export.md

diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index f8e879496c135..de7d66fb2dedf 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -39,6 +39,7 @@ filtering, dimensionality reduction, as well as underlying optimization primitiv
 * [Optimization (developer)](mllib-optimization.html)
   * stochastic gradient descent
   * limited-memory BFGS (L-BFGS)
+* [PMML model export](mllib-pmml-model-export.html)
 
 MLlib is under active development.
 The APIs marked `Experimental`/`DeveloperApi` may change in future releases, 
diff --git a/docs/mllib-pmml-model-export.md b/docs/mllib-pmml-model-export.md
new file mode 100644
index 0000000000000..42ea2ca81f80d
--- /dev/null
+++ b/docs/mllib-pmml-model-export.md
@@ -0,0 +1,86 @@
+---
+layout: global
+title: PMML model export - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - PMML model export
+---
+
+* Table of contents
+{:toc}
+
+## MLlib supported models
+
+MLlib supports model export to Predictive Model Markup Language ([PMML](http://en.wikipedia.org/wiki/Predictive_Model_Markup_Language)).
+
+The table below outlines the MLlib models that can be exported to PMML and their equivalent PMML model.
+
+<table class="table">
+  <thead>
+    <tr><th>MLlib model</th><th>PMML model</th></tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>KMeansModel</td><td>ClusteringModel</td>
+    </tr>    
+    <tr>
+      <td>LinearRegressionModel</td><td>RegressionModel (functionName="regression")</td>
+    </tr>
+    <tr>
+      <td>RidgeRegressionModel</td><td>RegressionModel (functionName="regression")</td>
+    </tr>
+    <tr>
+      <td>LassoModel</td><td>RegressionModel (functionName="regression")</td>
+    </tr>
+    <tr>
+      <td>SVMModel</td><td>RegressionModel (functionName="classification" normalizationMethod="none")</td>
+    </tr>
+    <tr>
+      <td>Binary LogisticRegressionModel</td><td>RegressionModel (functionName="classification" normalizationMethod="logit")</td>
+    </tr>
+  </tbody>
+</table>
+
+## Examples
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+To export a supported `model` (see table above) to PMML, simply call `model.toPMML`.
+
+Here a complete example of building a KMeansModel and print it out in PMML format:
+{% highlight scala %}
+import org.apache.spark.mllib.clustering.KMeans
+import org.apache.spark.mllib.linalg.Vectors
+
+// Load and parse the data
+val data = sc.textFile("data/mllib/kmeans_data.txt")
+val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
+
+// Cluster the data into two classes using KMeans
+val numClusters = 2
+val numIterations = 20
+val clusters = KMeans.train(parsedData, numClusters, numIterations)
+
+// Export to PMML
+println("PMML Model:\n" + clusters.toPMML)
+{% endhighlight %}
+
+As well as exporting the PMML model to a String (`model.toPMML` as in the example above), you can export the PMML model to other formats:
+
+{% highlight scala %}
+// Export the model to a String in PMML format
+clusters.toPMML
+
+// Export the model to a local file in PMML format
+clusters.toPMML("/tmp/kmeans.xml")
+
+// Export the model to a directory on a distributed file system in PMML format
+clusters.toPMML(sc,"/tmp/kmeans")
+
+// Export the model to the OutputStream in PMML format
+clusters.toPMML(System.out)
+{% endhighlight %}
+
+For unsupported models, either you will not find a `.toPMML` method or an `IllegalArgumentException` will be thrown.
+
+</div>
+
+</div>

From 563bfcc1ab1b1c79b1845230c8c600db85a08fe3 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Mon, 18 May 2015 10:59:35 -0700
Subject: [PATCH 230/320] [SPARK-7627] [SPARK-7472] DAG visualization: style
 skipped stages

This patch fixes two things:

**SPARK-7627.** Cached RDDs no longer light up on the job page. This is a simple fix.
**SPARK-7472.** Display skipped stages differently from normal stages.

The latter is a major UX issue. Because we link the job viz to the stage viz even for skipped stages, the user may inadvertently click into the stage page of a skipped stage, which is empty.

-------------------
<img src="https://cloud.githubusercontent.com/assets/2133137/7675241/de1a3da6-fcea-11e4-8101-88055cef78c5.png" width="300px" />

Author: Andrew Or <andrew@databricks.com>

Closes #6171 from andrewor14/dag-viz-skipped and squashes the following commits:

f261797 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-skipped
0eda358 [Andrew Or] Tweak skipped stage border color
c604150 [Andrew Or] Tweak grayscale colors
7010676 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-skipped
762b541 [Andrew Or] Use special prefix for stage clusters to avoid collisions
51c95b9 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-skipped
b928cd4 [Andrew Or] Fix potential leak + write tests for it
7c4c364 [Andrew Or] Show skipped stages differently
7cc34ce [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-skipped
c121fa2 [Andrew Or] Fix cache color
---
 .../apache/spark/ui/static/spark-dag-viz.css  |  71 +++---
 .../apache/spark/ui/static/spark-dag-viz.js   |  50 ++--
 .../scala/org/apache/spark/ui/UIUtils.scala   |   6 +-
 .../spark/ui/scope/RDDOperationGraph.scala    |  10 +-
 .../ui/scope/RDDOperationGraphListener.scala  |  96 ++++++--
 .../RDDOperationGraphListenerSuite.scala      | 227 ++++++++++++++----
 6 files changed, 352 insertions(+), 108 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
index eedefb44b96fc..3b4ae2ed354b8 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
@@ -15,32 +15,21 @@
  * limitations under the License.
  */
 
-#dag-viz-graph svg path {
-  stroke: #444;
-  stroke-width: 1.5px;
-}
-
-#dag-viz-graph svg g.cluster rect {
-  stroke-width: 1px;
-}
-
-#dag-viz-graph svg g.node circle {
-  fill: #444;
+#dag-viz-graph a, #dag-viz-graph a:hover {
+  text-decoration: none;
 }
 
-#dag-viz-graph svg g.node rect {
-  fill: #C3EBFF;
-  stroke: #3EC0FF;
-  stroke-width: 1px;
+#dag-viz-graph .label {
+  font-weight: normal;
+  text-shadow: none;
 }
 
-#dag-viz-graph svg g.node.cached circle {
-  fill: #444;
+#dag-viz-graph svg path {
+  stroke: #444;
+  stroke-width: 1.5px;
 }
 
-#dag-viz-graph svg g.node.cached rect {
-  fill: #B3F5C5;
-  stroke: #56F578;
+#dag-viz-graph svg g.cluster rect {
   stroke-width: 1px;
 }
 
@@ -61,12 +50,23 @@
   stroke-width: 1px;
 }
 
-#dag-viz-graph svg.job g.cluster[class*="stage"] rect {
+#dag-viz-graph svg.job g.cluster.skipped rect {
+  fill: #D6D6D6;
+  stroke: #B7B7B7;
+  stroke-width: 1px;
+}
+
+#dag-viz-graph svg.job g.cluster.stage rect {
   fill: #FFFFFF;
   stroke: #FF99AC;
   stroke-width: 1px;
 }
 
+#dag-viz-graph svg.job g.cluster.stage.skipped rect {
+  stroke: #ADADAD;
+  stroke-width: 1px;
+}
+
 #dag-viz-graph svg.job g#cross-stage-edges path {
   fill: none;
 }
@@ -75,6 +75,20 @@
   fill: #333;
 }
 
+#dag-viz-graph svg.job g.cluster.skipped text {
+  fill: #666;
+}
+
+#dag-viz-graph svg.job g.node circle {
+  fill: #444;
+}
+
+#dag-viz-graph svg.job g.node.cached circle {
+  fill: #A3F545;
+  stroke: #52C366;
+  stroke-width: 2px;
+}
+
 /* Stage page specific styles */
 
 #dag-viz-graph svg.stage g.cluster rect {
@@ -83,7 +97,7 @@
   stroke-width: 1px;
 }
 
-#dag-viz-graph svg.stage g.cluster[class*="stage"] rect {
+#dag-viz-graph svg.stage g.cluster.stage rect {
   fill: #FFFFFF;
   stroke: #FFA6B6;
   stroke-width: 1px;
@@ -97,11 +111,14 @@
   fill: #333;
 }
 
-#dag-viz-graph a, #dag-viz-graph a:hover {
-  text-decoration: none;
+#dag-viz-graph svg.stage g.node rect {
+  fill: #C3EBFF;
+  stroke: #3EC0FF;
+  stroke-width: 1px;
 }
 
-#dag-viz-graph .label {
-  font-weight: normal;
-  text-shadow: none;
+#dag-viz-graph svg.stage g.node.cached rect {
+  fill: #B3F5C5;
+  stroke: #52C366;
+  stroke-width: 2px;
 }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index ee48fd29a6432..aaeba5b1027c9 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -57,9 +57,7 @@ var VizConstants = {
   stageSep: 40,
   graphPrefix: "graph_",
   nodePrefix: "node_",
-  stagePrefix: "stage_",
-  clusterPrefix: "cluster_",
-  stageClusterPrefix: "cluster_stage_"
+  clusterPrefix: "cluster_"
 };
 
 var JobPageVizConstants = {
@@ -133,9 +131,7 @@ function renderDagViz(forJob) {
   }
 
   // Render
-  var svg = graphContainer()
-    .append("svg")
-    .attr("class", jobOrStage);
+  var svg = graphContainer().append("svg").attr("class", jobOrStage);
   if (forJob) {
     renderDagVizForJob(svg);
   } else {
@@ -185,23 +181,32 @@ function renderDagVizForJob(svgContainer) {
     var dot = metadata.select(".dot-file").text();
     var stageId = metadata.attr("stage-id");
     var containerId = VizConstants.graphPrefix + stageId;
-    // Link each graph to the corresponding stage page (TODO: handle stage attempts)
-    var stageLink = $("#stage-" + stageId.replace(VizConstants.stagePrefix, "") + "-0")
-      .find("a")
-      .attr("href") + "&expandDagViz=true";
-    var container = svgContainer
-      .append("a")
-      .attr("xlink:href", stageLink)
-      .append("g")
-      .attr("id", containerId);
+    var isSkipped = metadata.attr("skipped") == "true";
+    var container;
+    if (isSkipped) {
+      container = svgContainer
+        .append("g")
+        .attr("id", containerId)
+        .attr("skipped", "true");
+    } else {
+      // Link each graph to the corresponding stage page (TODO: handle stage attempts)
+      // Use the link from the stage table so it also works for the history server
+      var attemptId = 0
+      var stageLink = d3.select("#stage-" + stageId + "-" + attemptId)
+        .select("a")
+        .attr("href") + "&expandDagViz=true";
+      container = svgContainer
+        .append("a")
+        .attr("xlink:href", stageLink)
+        .append("g")
+        .attr("id", containerId);
+    }
 
     // Now we need to shift the container for this stage so it doesn't overlap with
     // existing ones, taking into account the position and width of the last stage's
     // container. We do not need to do this for the first stage of this job.
     if (i > 0) {
-      var existingStages = svgContainer
-        .selectAll("g.cluster")
-        .filter("[class*=\"" + VizConstants.stageClusterPrefix + "\"]");
+      var existingStages = svgContainer.selectAll("g.cluster.stage")
       if (!existingStages.empty()) {
         var lastStage = d3.select(existingStages[0].pop());
         var lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
@@ -214,6 +219,12 @@ function renderDagVizForJob(svgContainer) {
     // Actually render the stage
     renderDot(dot, container, true);
 
+    // Mark elements as skipped if appropriate. Unfortunately we need to mark all
+    // elements instead of the parent container because of CSS override rules.
+    if (isSkipped) {
+      container.selectAll("g").classed("skipped", true);
+    }
+
     // Round corners on rectangles
     container
       .selectAll("rect")
@@ -243,6 +254,9 @@ function renderDot(dot, container, forJob) {
   var renderer = new dagreD3.render();
   preprocessGraphLayout(g, forJob);
   renderer(container, g);
+
+  // Find the stage cluster and mark it for styling and post-processing
+  container.selectAll("g.cluster[name*=\"Stage\"]").classed("stage", true);
 }
 
 /* -------------------- *
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index ad16becde85dd..6194c50ec8c7c 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -352,10 +352,12 @@ private[spark] object UIUtils extends Logging {
         </a>
       </span>
       <div id="dag-viz-graph"></div>
-      <div id="dag-viz-metadata">
+      <div id="dag-viz-metadata" style="display:none">
         {
           graphs.map { g =>
-            <div class="stage-metadata" stage-id={g.rootCluster.id} style="display:none">
+            val stageId = g.rootCluster.id.replaceAll(RDDOperationGraph.STAGE_CLUSTER_PREFIX, "")
+            val skipped = g.rootCluster.name.contains("skipped").toString
+            <div class="stage-metadata" stage-id={stageId} skipped={skipped}>
               <div class="dot-file">{RDDOperationGraph.makeDotFile(g)}</div>
               { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
               { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index 25d5c6ff7e9cd..33a7303be711c 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -52,10 +52,13 @@ private[ui] case class RDDOperationEdge(fromId: Int, toId: Int)
  * This represents any grouping of RDDs, including operation scopes (e.g. textFile, flatMap),
  * stages, jobs, or any higher level construct. A cluster may be nested inside of other clusters.
  */
-private[ui] class RDDOperationCluster(val id: String, val name: String) {
+private[ui] class RDDOperationCluster(val id: String, private var _name: String) {
   private val _childNodes = new ListBuffer[RDDOperationNode]
   private val _childClusters = new ListBuffer[RDDOperationCluster]
 
+  def name: String = _name
+  def setName(n: String): Unit = { _name = n }
+
   def childNodes: Seq[RDDOperationNode] = _childNodes.iterator.toSeq
   def childClusters: Seq[RDDOperationCluster] = _childClusters.iterator.toSeq
   def attachChildNode(childNode: RDDOperationNode): Unit = { _childNodes += childNode }
@@ -71,6 +74,8 @@ private[ui] class RDDOperationCluster(val id: String, val name: String) {
 
 private[ui] object RDDOperationGraph extends Logging {
 
+  val STAGE_CLUSTER_PREFIX = "stage_"
+
   /**
    * Construct a RDDOperationGraph for a given stage.
    *
@@ -88,7 +93,8 @@ private[ui] object RDDOperationGraph extends Logging {
     val clusters = new mutable.HashMap[String, RDDOperationCluster] // indexed by cluster ID
 
     // Root cluster is the stage cluster
-    val stageClusterId = s"stage_${stage.stageId}"
+    // Use a special prefix here to differentiate this cluster from other operation clusters
+    val stageClusterId = STAGE_CLUSTER_PREFIX + stage.stageId
     val stageClusterName = s"Stage ${stage.stageId}" +
       { if (stage.attemptId == 0) "" else s" (attempt ${stage.attemptId})" }
     val rootCluster = new RDDOperationCluster(stageClusterId, stageClusterName)
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
index aa9c25cb5c8c6..89119cd3579ef 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
@@ -27,8 +27,15 @@ import org.apache.spark.ui.SparkUI
  * A SparkListener that constructs a DAG of RDD operations.
  */
 private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListener {
+
+  // Note: the fate of jobs and stages are tied. This means when we clean up a job,
+  // we always clean up all of its stages. Similarly, when we clean up a stage, we
+  // always clean up its job (and, transitively, other stages in the same job).
   private[ui] val jobIdToStageIds = new mutable.HashMap[Int, Seq[Int]]
+  private[ui] val jobIdToSkippedStageIds = new mutable.HashMap[Int, Seq[Int]]
+  private[ui] val stageIdToJobId = new mutable.HashMap[Int, Int]
   private[ui] val stageIdToGraph = new mutable.HashMap[Int, RDDOperationGraph]
+  private[ui] val completedStageIds = new mutable.HashSet[Int]
 
   // Keep track of the order in which these are inserted so we can remove old ones
   private[ui] val jobIds = new mutable.ArrayBuffer[Int]
@@ -40,16 +47,23 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
   private val retainedStages =
     conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES)
 
-  /** Return the graph metadata for the given stage, or None if no such information exists. */
+  /**
+   * Return the graph metadata for all stages in the given job.
+   * An empty list is returned if one or more of its stages has been cleaned up.
+   */
   def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = synchronized {
-    val _stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty }
-    val graphs = _stageIds.flatMap { sid => stageIdToGraph.get(sid) }
-    // If the metadata for some stages have been removed, do not bother rendering this job
-    if (_stageIds.size != graphs.size) {
-      Seq.empty
-    } else {
-      graphs
+    val skippedStageIds = jobIdToSkippedStageIds.get(jobId).getOrElse(Seq.empty)
+    val graphs = jobIdToStageIds.get(jobId)
+      .getOrElse(Seq.empty)
+      .flatMap { sid => stageIdToGraph.get(sid) }
+    // Mark any skipped stages as such
+    graphs.foreach { g =>
+      val stageId = g.rootCluster.id.replaceAll(RDDOperationGraph.STAGE_CLUSTER_PREFIX, "").toInt
+      if (skippedStageIds.contains(stageId) && !g.rootCluster.name.contains("skipped")) {
+        g.rootCluster.setName(g.rootCluster.name + " (skipped)")
+      }
     }
+    graphs
   }
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */
@@ -66,22 +80,68 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
     jobIdToStageIds(jobId) = jobStart.stageInfos.map(_.stageId).sorted
 
     stageInfos.foreach { stageInfo =>
-      stageIds += stageInfo.stageId
-      stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo)
-      // Remove state for old stages
-      if (stageIds.size >= retainedStages) {
-        val toRemove = math.max(retainedStages / 10, 1)
-        stageIds.take(toRemove).foreach { id => stageIdToGraph.remove(id) }
-        stageIds.trimStart(toRemove)
-      }
+      val stageId = stageInfo.stageId
+      stageIds += stageId
+      stageIdToJobId(stageId) = jobId
+      stageIdToGraph(stageId) = RDDOperationGraph.makeOperationGraph(stageInfo)
+      trimStagesIfNecessary()
+    }
+
+    trimJobsIfNecessary()
+  }
+
+  /** Keep track of stages that have completed. */
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = synchronized {
+    val stageId = stageCompleted.stageInfo.stageId
+    if (stageIdToJobId.contains(stageId)) {
+      // Note: Only do this if the stage has not already been cleaned up
+      // Otherwise, we may never clean this stage from `completedStageIds`
+      completedStageIds += stageCompleted.stageInfo.stageId
+    }
+  }
+
+  /** On job end, find all stages in this job that are skipped and mark them as such. */
+  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = synchronized {
+    val jobId = jobEnd.jobId
+    jobIdToStageIds.get(jobId).foreach { stageIds =>
+      val skippedStageIds = stageIds.filter { sid => !completedStageIds.contains(sid) }
+      // Note: Only do this if the job has not already been cleaned up
+      // Otherwise, we may never clean this job from `jobIdToSkippedStageIds`
+      jobIdToSkippedStageIds(jobId) = skippedStageIds
     }
+  }
+
+  /** Clean metadata for old stages if we have exceeded the number to retain. */
+  private def trimStagesIfNecessary(): Unit = {
+    if (stageIds.size >= retainedStages) {
+      val toRemove = math.max(retainedStages / 10, 1)
+      stageIds.take(toRemove).foreach { id => cleanStage(id) }
+      stageIds.trimStart(toRemove)
+    }
+  }
 
-    // Remove state for old jobs
+  /** Clean metadata for old jobs if we have exceeded the number to retain. */
+  private def trimJobsIfNecessary(): Unit = {
     if (jobIds.size >= retainedJobs) {
       val toRemove = math.max(retainedJobs / 10, 1)
-      jobIds.take(toRemove).foreach { id => jobIdToStageIds.remove(id) }
+      jobIds.take(toRemove).foreach { id => cleanJob(id) }
       jobIds.trimStart(toRemove)
     }
   }
 
+  /** Clean metadata for the given stage, its job, and all other stages that belong to the job. */
+  private[ui] def cleanStage(stageId: Int): Unit = {
+    completedStageIds.remove(stageId)
+    stageIdToGraph.remove(stageId)
+    stageIdToJobId.remove(stageId).foreach { jobId => cleanJob(jobId) }
+  }
+
+  /** Clean metadata for the given job and all stages that belong to it. */
+  private[ui] def cleanJob(jobId: Int): Unit = {
+    jobIdToSkippedStageIds.remove(jobId)
+    jobIdToStageIds.remove(jobId).foreach { stageIds =>
+      stageIds.foreach { stageId => cleanStage(stageId) }
+    }
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
index c659fc1e8b9a9..c1126f3af52e6 100644
--- a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala
@@ -20,67 +20,212 @@ package org.apache.spark.ui.scope
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkConf
-import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListenerStageSubmitted, StageInfo}
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.SparkListenerStageSubmitted
+import org.apache.spark.scheduler.SparkListenerStageCompleted
+import org.apache.spark.scheduler.SparkListenerJobStart
 
+/**
+ * Tests that this listener populates and cleans up its data structures properly.
+ */
 class RDDOperationGraphListenerSuite extends FunSuite {
   private var jobIdCounter = 0
   private var stageIdCounter = 0
+  private val maxRetainedJobs = 10
+  private val maxRetainedStages = 10
+  private val conf = new SparkConf()
+    .set("spark.ui.retainedJobs", maxRetainedJobs.toString)
+    .set("spark.ui.retainedStages", maxRetainedStages.toString)
 
-  /** Run a job with the specified number of stages. */
-  private def runOneJob(numStages: Int, listener: RDDOperationGraphListener): Unit = {
-    assert(numStages > 0, "I will not run a job with 0 stages for you.")
-    val stageInfos = (0 until numStages).map { _ =>
-      val stageInfo = new StageInfo(stageIdCounter, 0, "s", 0, Seq.empty, Seq.empty, "d")
-      stageIdCounter += 1
-      stageInfo
-    }
-    listener.onJobStart(new SparkListenerJobStart(jobIdCounter, 0, stageInfos))
-    jobIdCounter += 1
-  }
-
-  test("listener cleans up metadata") {
-
-    val conf = new SparkConf()
-      .set("spark.ui.retainedStages", "10")
-      .set("spark.ui.retainedJobs", "10")
-
+  test("run normal jobs") {
+    val startingJobId = jobIdCounter
+    val startingStageId = stageIdCounter
     val listener = new RDDOperationGraphListener(conf)
     assert(listener.jobIdToStageIds.isEmpty)
+    assert(listener.jobIdToSkippedStageIds.isEmpty)
+    assert(listener.stageIdToJobId.isEmpty)
     assert(listener.stageIdToGraph.isEmpty)
+    assert(listener.completedStageIds.isEmpty)
     assert(listener.jobIds.isEmpty)
     assert(listener.stageIds.isEmpty)
 
     // Run a few jobs, but not enough for clean up yet
-    runOneJob(1, listener)
-    runOneJob(2, listener)
-    runOneJob(3, listener)
+    (1 to 3).foreach { numStages => startJob(numStages, listener) } // start 3 jobs and 6 stages
+    (0 to 5).foreach { i => endStage(startingStageId + i, listener) } // finish all 6 stages
+    (0 to 2).foreach { i => endJob(startingJobId + i, listener) } // finish all 3 jobs
+
     assert(listener.jobIdToStageIds.size === 3)
+    assert(listener.jobIdToStageIds(startingJobId).size === 1)
+    assert(listener.jobIdToStageIds(startingJobId + 1).size === 2)
+    assert(listener.jobIdToStageIds(startingJobId + 2).size === 3)
+    assert(listener.jobIdToSkippedStageIds.size === 3)
+    assert(listener.jobIdToSkippedStageIds.values.forall(_.isEmpty)) // no skipped stages
+    assert(listener.stageIdToJobId.size === 6)
+    assert(listener.stageIdToJobId(startingStageId) === startingJobId)
+    assert(listener.stageIdToJobId(startingStageId + 1) === startingJobId + 1)
+    assert(listener.stageIdToJobId(startingStageId + 2) === startingJobId + 1)
+    assert(listener.stageIdToJobId(startingStageId + 3) === startingJobId + 2)
+    assert(listener.stageIdToJobId(startingStageId + 4) === startingJobId + 2)
+    assert(listener.stageIdToJobId(startingStageId + 5) === startingJobId + 2)
     assert(listener.stageIdToGraph.size === 6)
+    assert(listener.completedStageIds.size === 6)
     assert(listener.jobIds.size === 3)
     assert(listener.stageIds.size === 6)
+  }
+
+  test("run jobs with skipped stages") {
+    val startingJobId = jobIdCounter
+    val startingStageId = stageIdCounter
+    val listener = new RDDOperationGraphListener(conf)
+
+    // Run a few jobs, but not enough for clean up yet
+    // Leave some stages unfinished so that they are marked as skipped
+    (1 to 3).foreach { numStages => startJob(numStages, listener) } // start 3 jobs and 6 stages
+    (4 to 5).foreach { i => endStage(startingStageId + i, listener) } // finish only last 2 stages
+    (0 to 2).foreach { i => endJob(startingJobId + i, listener) } // finish all 3 jobs
+
+    assert(listener.jobIdToSkippedStageIds.size === 3)
+    assert(listener.jobIdToSkippedStageIds(startingJobId).size === 1)
+    assert(listener.jobIdToSkippedStageIds(startingJobId + 1).size === 2)
+    assert(listener.jobIdToSkippedStageIds(startingJobId + 2).size === 1) // 2 stages not skipped
+    assert(listener.completedStageIds.size === 2)
+
+    // The rest should be the same as before
+    assert(listener.jobIdToStageIds.size === 3)
+    assert(listener.jobIdToStageIds(startingJobId).size === 1)
+    assert(listener.jobIdToStageIds(startingJobId + 1).size === 2)
+    assert(listener.jobIdToStageIds(startingJobId + 2).size === 3)
+    assert(listener.stageIdToJobId.size === 6)
+    assert(listener.stageIdToJobId(startingStageId) === startingJobId)
+    assert(listener.stageIdToJobId(startingStageId + 1) === startingJobId + 1)
+    assert(listener.stageIdToJobId(startingStageId + 2) === startingJobId + 1)
+    assert(listener.stageIdToJobId(startingStageId + 3) === startingJobId + 2)
+    assert(listener.stageIdToJobId(startingStageId + 4) === startingJobId + 2)
+    assert(listener.stageIdToJobId(startingStageId + 5) === startingJobId + 2)
+    assert(listener.stageIdToGraph.size === 6)
+    assert(listener.jobIds.size === 3)
+    assert(listener.stageIds.size === 6)
+  }
+
+  test("clean up metadata") {
+    val startingJobId = jobIdCounter
+    val startingStageId = stageIdCounter
+    val listener = new RDDOperationGraphListener(conf)
 
-    // Run a few more, but this time the stages should be cleaned up, but not the jobs
-    runOneJob(5, listener)
-    runOneJob(100, listener)
-    assert(listener.jobIdToStageIds.size === 5)
-    assert(listener.stageIdToGraph.size === 9)
-    assert(listener.jobIds.size === 5)
-    assert(listener.stageIds.size === 9)
-
-    // Run a few more, but this time both jobs and stages should be cleaned up
-    (1 to 100).foreach { _ =>
-      runOneJob(1, listener)
+    // Run many jobs and stages to trigger clean up
+    (1 to 10000).foreach { i =>
+      // Note: this must be less than `maxRetainedStages`
+      val numStages = i % (maxRetainedStages - 2) + 1
+      val startingStageIdForJob = stageIdCounter
+      val jobId = startJob(numStages, listener)
+      // End some, but not all, stages that belong to this job
+      // This is to ensure that we have both completed and skipped stages
+      (startingStageIdForJob until stageIdCounter)
+        .filter { i => i % 2 == 0 }
+        .foreach { i => endStage(i, listener) }
+      // End all jobs
+      endJob(jobId, listener)
     }
-    assert(listener.jobIdToStageIds.size === 9)
-    assert(listener.stageIdToGraph.size === 9)
-    assert(listener.jobIds.size === 9)
-    assert(listener.stageIds.size === 9)
+
+    // Ensure we never exceed the max retained thresholds
+    assert(listener.jobIdToStageIds.size <= maxRetainedJobs)
+    assert(listener.jobIdToSkippedStageIds.size <= maxRetainedJobs)
+    assert(listener.stageIdToJobId.size <= maxRetainedStages)
+    assert(listener.stageIdToGraph.size <= maxRetainedStages)
+    assert(listener.completedStageIds.size <= maxRetainedStages)
+    assert(listener.jobIds.size <= maxRetainedJobs)
+    assert(listener.stageIds.size <= maxRetainedStages)
+
+    // Also ensure we're actually populating these data structures
+    // Otherwise the previous group of asserts will be meaningless
+    assert(listener.jobIdToStageIds.nonEmpty)
+    assert(listener.jobIdToSkippedStageIds.nonEmpty)
+    assert(listener.stageIdToJobId.nonEmpty)
+    assert(listener.stageIdToGraph.nonEmpty)
+    assert(listener.completedStageIds.nonEmpty)
+    assert(listener.jobIds.nonEmpty)
+    assert(listener.stageIds.nonEmpty)
 
     // Ensure we clean up old jobs and stages, not arbitrary ones
-    assert(!listener.jobIdToStageIds.contains(0))
-    assert(!listener.stageIdToGraph.contains(0))
-    assert(!listener.stageIds.contains(0))
-    assert(!listener.jobIds.contains(0))
+    assert(!listener.jobIdToStageIds.contains(startingJobId))
+    assert(!listener.jobIdToSkippedStageIds.contains(startingJobId))
+    assert(!listener.stageIdToJobId.contains(startingStageId))
+    assert(!listener.stageIdToGraph.contains(startingStageId))
+    assert(!listener.completedStageIds.contains(startingStageId))
+    assert(!listener.stageIds.contains(startingStageId))
+    assert(!listener.jobIds.contains(startingJobId))
+  }
+
+  test("fate sharing between jobs and stages") {
+    val startingJobId = jobIdCounter
+    val startingStageId = stageIdCounter
+    val listener = new RDDOperationGraphListener(conf)
+
+    // Run 3 jobs and 8 stages, finishing all 3 jobs but only 2 stages
+    startJob(5, listener)
+    startJob(1, listener)
+    startJob(2, listener)
+    (0 until 8).foreach { i => startStage(i + startingStageId, listener) }
+    endStage(startingStageId + 3, listener)
+    endStage(startingStageId + 4, listener)
+    (0 until 3).foreach { i => endJob(i + startingJobId, listener) }
+
+    // First, assert the old stuff
+    assert(listener.jobIdToStageIds.size === 3)
+    assert(listener.jobIdToSkippedStageIds.size === 3)
+    assert(listener.stageIdToJobId.size === 8)
+    assert(listener.stageIdToGraph.size === 8)
+    assert(listener.completedStageIds.size === 2)
+
+    // Cleaning the third job should clean all of its stages
+    listener.cleanJob(startingJobId + 2)
+    assert(listener.jobIdToStageIds.size === 2)
+    assert(listener.jobIdToSkippedStageIds.size === 2)
+    assert(listener.stageIdToJobId.size === 6)
+    assert(listener.stageIdToGraph.size === 6)
+    assert(listener.completedStageIds.size === 2)
+
+    // Cleaning one of the stages in the first job should clean that job and all of its stages
+    // Note that we still keep around the last stage because it belongs to a different job
+    listener.cleanStage(startingStageId)
+    assert(listener.jobIdToStageIds.size === 1)
+    assert(listener.jobIdToSkippedStageIds.size === 1)
+    assert(listener.stageIdToJobId.size === 1)
+    assert(listener.stageIdToGraph.size === 1)
+    assert(listener.completedStageIds.size === 0)
+  }
+
+  /** Start a job with the specified number of stages. */
+  private def startJob(numStages: Int, listener: RDDOperationGraphListener): Int = {
+    assert(numStages > 0, "I will not run a job with 0 stages for you.")
+    val stageInfos = (0 until numStages).map { _ =>
+      val stageInfo = new StageInfo(stageIdCounter, 0, "s", 0, Seq.empty, Seq.empty, "d")
+      stageIdCounter += 1
+      stageInfo
+    }
+    val jobId = jobIdCounter
+    listener.onJobStart(new SparkListenerJobStart(jobId, 0, stageInfos))
+    // Also start all stages that belong to this job
+    stageInfos.map(_.stageId).foreach { sid => startStage(sid, listener) }
+    jobIdCounter += 1
+    jobId
+  }
+
+  /** Start the stage specified by the given ID. */
+  private def startStage(stageId: Int, listener: RDDOperationGraphListener): Unit = {
+    val stageInfo = new StageInfo(stageId, 0, "s", 0, Seq.empty, Seq.empty, "d")
+    listener.onStageSubmitted(new SparkListenerStageSubmitted(stageInfo))
+  }
+
+  /** Finish the stage specified by the given ID. */
+  private def endStage(stageId: Int, listener: RDDOperationGraphListener): Unit = {
+    val stageInfo = new StageInfo(stageId, 0, "s", 0, Seq.empty, Seq.empty, "d")
+    listener.onStageCompleted(new SparkListenerStageCompleted(stageInfo))
+  }
+
+  /** Finish the job specified by the given ID. */
+  private def endJob(jobId: Int, listener: RDDOperationGraphListener): Unit = {
+    listener.onJobEnd(new SparkListenerJobEnd(jobId, 0, JobSucceeded))
   }
 
 }

From e1ac2a955be64b8df197195e3b225271cfa8201f Mon Sep 17 00:00:00 2001
From: Rene Treffer <treffer@measite.de>
Date: Mon, 18 May 2015 11:55:36 -0700
Subject: [PATCH 231/320] [SPARK-6888] [SQL] Make the jdbc driver handling
 user-definable

Replace the DriverQuirks with JdbcDialect(s) (and MySQLDialect/PostgresDialect)
and allow developers to change the dialects on the fly (for new JDBCRRDs only).

Some types (like an unsigned 64bit number) can be trivially mapped to java.
The status quo is that the RRD will fail to load.
This patch makes it possible to overwrite the type mapping to read e.g.
64Bit numbers as strings and handle them afterwards in software.

JDBCSuite has an example that maps all types to String, which should always
work (at the cost of extra code afterwards).

As a side effect it should now be possible to develop simple dialects
out-of-tree and even with spark-shell.

Author: Rene Treffer <treffer@measite.de>

Closes #5555 from rtreffer/jdbc-dialects and squashes the following commits:

3cbafd7 [Rene Treffer] [SPARK-6888] ignore classes belonging to changed API in MIMA report
fe7e2e8 [Rene Treffer] [SPARK-6888] Make the jdbc driver handling user-definable
---
 project/MimaExcludes.scala                    |   8 +
 .../apache/spark/sql/jdbc/DriverQuirks.scala  |  99 --------
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   |  11 +-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 211 ++++++++++++++++++
 .../org/apache/spark/sql/jdbc/jdbc.scala      |  43 ++--
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala |  49 ++++
 6 files changed, 295 insertions(+), 126 deletions(-)
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 487062a31f77f..513bbaf98d804 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -137,6 +137,14 @@ object MimaExcludes {
             // implementing this interface in Java. Note that ShuffleWriter is private[spark].
             ProblemFilters.exclude[IncompatibleTemplateDefProblem](
               "org.apache.spark.shuffle.ShuffleWriter")
+          ) ++ Seq(
+            // SPARK-6888 make jdbc driver handling user definable
+            // This patch renames some classes to API friendly names.
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DriverQuirks$"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DriverQuirks"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.PostgresQuirks"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.NoQuirks"),
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.MySQLQuirks")
           )
 
         case v if v.startsWith("1.3") =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala
deleted file mode 100644
index 0feabc4282f4a..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc
-
-import org.apache.spark.sql.types._
-
-import java.sql.Types
-
-
-/**
- * Encapsulates workarounds for the extensions, quirks, and bugs in various
- * databases.  Lots of databases define types that aren't explicitly supported
- * by the JDBC spec.  Some JDBC drivers also report inaccurate
- * information---for instance, BIT(n>1) being reported as a BIT type is quite
- * common, even though BIT in JDBC is meant for single-bit values.  Also, there
- * does not appear to be a standard name for an unbounded string or binary
- * type; we use BLOB and CLOB by default but override with database-specific
- * alternatives when these are absent or do not behave correctly.
- *
- * Currently, the only thing DriverQuirks does is handle type mapping.
- * `getCatalystType` is used when reading from a JDBC table and `getJDBCType`
- * is used when writing to a JDBC table.  If `getCatalystType` returns `null`,
- * the default type handling is used for the given JDBC type.  Similarly,
- * if `getJDBCType` returns `(null, None)`, the default type handling is used
- * for the given Catalyst type.
- */
-private[sql] abstract class DriverQuirks {
-  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType
-  def getJDBCType(dt: DataType): (String, Option[Int])
-}
-
-private[sql] object DriverQuirks {
-  /**
-   * Fetch the DriverQuirks class corresponding to a given database url.
-   */
-  def get(url: String): DriverQuirks = {
-    if (url.startsWith("jdbc:mysql")) {
-      new MySQLQuirks()
-    } else if (url.startsWith("jdbc:postgresql")) {
-      new PostgresQuirks()
-    } else {
-      new NoQuirks()
-    }
-  }
-}
-
-private[sql] class NoQuirks extends DriverQuirks {
-  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType =
-    null
-  def getJDBCType(dt: DataType): (String, Option[Int]) = (null, None)
-}
-
-private[sql] class PostgresQuirks extends DriverQuirks {
-  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = {
-    if (sqlType == Types.BIT && typeName.equals("bit") && size != 1) {
-      BinaryType
-    } else if (sqlType == Types.OTHER && typeName.equals("cidr")) {
-      StringType
-    } else if (sqlType == Types.OTHER && typeName.equals("inet")) {
-      StringType
-    } else null
-  }
-
-  def getJDBCType(dt: DataType): (String, Option[Int]) = dt match {
-    case StringType => ("TEXT", Some(java.sql.Types.CHAR))
-    case BinaryType => ("BYTEA", Some(java.sql.Types.BINARY))
-    case BooleanType => ("BOOLEAN", Some(java.sql.Types.BOOLEAN))
-    case _ => (null, None)
-  }
-}
-
-private[sql] class MySQLQuirks extends DriverQuirks {
-  def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = {
-    if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
-      // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as
-      // byte arrays instead of longs.
-      md.putLong("binarylong", 1)
-      LongType
-    } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) {
-      BooleanType
-    } else null
-  }
-  def getJDBCType(dt: DataType): (String, Option[Int]) = (null, None)
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 4189dfcf956c0..f7b19096eaacb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -41,7 +41,7 @@ private[sql] object JDBCRDD extends Logging {
 
   /**
    * Maps a JDBC type to a Catalyst type.  This function is called only when
-   * the DriverQuirks class corresponding to your database driver returns null.
+   * the JdbcDialect class corresponding to your database driver returns null.
    *
    * @param sqlType - A field of java.sql.Types
    * @return The Catalyst type corresponding to sqlType.
@@ -51,7 +51,7 @@ private[sql] object JDBCRDD extends Logging {
       case java.sql.Types.ARRAY         => null
       case java.sql.Types.BIGINT        => LongType
       case java.sql.Types.BINARY        => BinaryType
-      case java.sql.Types.BIT           => BooleanType // Per JDBC; Quirks handles quirky drivers.
+      case java.sql.Types.BIT           => BooleanType // @see JdbcDialect for quirks
       case java.sql.Types.BLOB          => BinaryType
       case java.sql.Types.BOOLEAN       => BooleanType
       case java.sql.Types.CHAR          => StringType
@@ -108,7 +108,7 @@ private[sql] object JDBCRDD extends Logging {
    * @throws SQLException if the table contains an unsupported type.
    */
   def resolveTable(url: String, table: String, properties: Properties): StructType = {
-    val quirks = DriverQuirks.get(url)
+    val dialect = JdbcDialects.get(url)
     val conn: Connection = DriverManager.getConnection(url, properties)
     try {
       val rs = conn.prepareStatement(s"SELECT * FROM $table WHERE 1=0").executeQuery()
@@ -125,8 +125,9 @@ private[sql] object JDBCRDD extends Logging {
           val fieldScale = rsmd.getScale(i + 1)
           val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls
           val metadata = new MetadataBuilder().putString("name", columnName)
-          var columnType = quirks.getCatalystType(dataType, typeName, fieldSize, metadata)
-          if (columnType == null) columnType = getCatalystType(dataType, fieldSize, fieldScale)
+          val columnType =
+            dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse(
+              getCatalystType(dataType, fieldSize, fieldScale))
           fields(i) = StructField(columnName, columnType, nullable, metadata.build())
           i = i + 1
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
new file mode 100644
index 0000000000000..6a169e106b968
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import org.apache.spark.sql.types._
+import org.apache.spark.annotation.DeveloperApi
+
+import java.sql.Types
+
+/**
+ * :: DeveloperApi ::
+ * A database type definition coupled with the jdbc type needed to send null
+ * values to the database.
+ * @param databaseTypeDefinition The database type definition
+ * @param jdbcNullType The jdbc type (as defined in java.sql.Types) used to
+ *                     send a null value to the database.
+ */
+@DeveloperApi
+case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
+
+/**
+ * :: DeveloperApi ::
+ * Encapsulates everything (extensions, workarounds, quirks) to handle the
+ * SQL dialect of a certain database or jdbc driver.
+ * Lots of databases define types that aren't explicitly supported
+ * by the JDBC spec.  Some JDBC drivers also report inaccurate
+ * information---for instance, BIT(n>1) being reported as a BIT type is quite
+ * common, even though BIT in JDBC is meant for single-bit values.  Also, there
+ * does not appear to be a standard name for an unbounded string or binary
+ * type; we use BLOB and CLOB by default but override with database-specific
+ * alternatives when these are absent or do not behave correctly.
+ *
+ * Currently, the only thing done by the dialect is type mapping.
+ * `getCatalystType` is used when reading from a JDBC table and `getJDBCType`
+ * is used when writing to a JDBC table.  If `getCatalystType` returns `null`,
+ * the default type handling is used for the given JDBC type.  Similarly,
+ * if `getJDBCType` returns `(null, None)`, the default type handling is used
+ * for the given Catalyst type.
+ */
+@DeveloperApi
+abstract class JdbcDialect {
+  /**
+   * Check if this dialect instance can handle a certain jdbc url.
+   * @param url the jdbc url.
+   * @return True if the dialect can be applied on the given jdbc url.
+   * @throws NullPointerException if the url is null.
+   */
+  def canHandle(url : String): Boolean
+
+  /**
+   * Get the custom datatype mapping for the given jdbc meta information.
+   * @param sqlType The sql type (see java.sql.Types)
+   * @param typeName The sql type name (e.g. "BIGINT UNSIGNED")
+   * @param size The size of the type.
+   * @param md Result metadata associated with this type.
+   * @return The actual DataType (subclasses of [[org.apache.spark.sql.types.DataType]])
+   *         or null if the default type mapping should be used.
+   */
+  def getCatalystType(
+    sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = None
+
+  /**
+   * Retrieve the jdbc / sql type for a given datatype.
+   * @param dt The datatype (e.g. [[org.apache.spark.sql.types.StringType]])
+   * @return The new JdbcType if there is an override for this DataType
+   */
+  def getJDBCType(dt: DataType): Option[JdbcType] = None
+}
+
+/**
+ * :: DeveloperApi ::
+ * Registry of dialects that apply to every new jdbc [[org.apache.spark.sql.DataFrame]].
+ *
+ * If multiple matching dialects are registered then all matching ones will be
+ * tried in reverse order. A user-added dialect will thus be applied first,
+ * overwriting the defaults.
+ *
+ * Note that all new dialects are applied to new jdbc DataFrames only. Make
+ * sure to register your dialects first.
+ */
+@DeveloperApi
+object JdbcDialects {
+
+  private var dialects = List[JdbcDialect]()
+
+  /**
+   * Register a dialect for use on all new matching jdbc [[org.apache.spark.sql.DataFrame]].
+   * Readding an existing dialect will cause a move-to-front.
+   * @param dialect The new dialect.
+   */
+  def registerDialect(dialect: JdbcDialect) : Unit = {
+    dialects = dialect :: dialects.filterNot(_ == dialect)
+  }
+
+  /**
+   * Unregister a dialect. Does nothing if the dialect is not registered.
+   * @param dialect The jdbc dialect.
+   */
+  def unregisterDialect(dialect : JdbcDialect) : Unit = {
+    dialects = dialects.filterNot(_ == dialect)
+  }
+
+  registerDialect(MySQLDialect)
+  registerDialect(PostgresDialect)
+
+  /**
+   * Fetch the JdbcDialect class corresponding to a given database url.
+   */
+  private[sql] def get(url: String): JdbcDialect = {
+    val matchingDialects = dialects.filter(_.canHandle(url))
+    matchingDialects.length match {
+      case 0 => NoopDialect
+      case 1 => matchingDialects.head
+      case _ => new AggregatedDialect(matchingDialects)
+    }
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * AggregatedDialect can unify multiple dialects into one virtual Dialect.
+ * Dialects are tried in order, and the first dialect that does not return a
+ * neutral element will will.
+ * @param dialects List of dialects.
+ */
+@DeveloperApi
+class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect {
+
+  require(!dialects.isEmpty)
+
+  def canHandle(url : String): Boolean =
+    dialects.map(_.canHandle(url)).reduce(_ && _)
+
+  override def getCatalystType(
+      sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] =
+    dialects.map(_.getCatalystType(sqlType, typeName, size, md)).flatten.headOption
+
+  override def getJDBCType(dt: DataType): Option[JdbcType] =
+    dialects.map(_.getJDBCType(dt)).flatten.headOption
+
+}
+
+/**
+ * :: DeveloperApi ::
+ * NOOP dialect object, always returning the neutral element.
+ */
+@DeveloperApi
+case object NoopDialect extends JdbcDialect {
+  def canHandle(url : String): Boolean = true
+}
+
+/**
+ * :: DeveloperApi ::
+ * Default postgres dialect, mapping bit/cidr/inet on read and string/binary/boolean on write.
+ */
+@DeveloperApi
+case object PostgresDialect extends JdbcDialect {
+  def canHandle(url: String): Boolean = url.startsWith("jdbc:postgresql")
+  override def getCatalystType(
+      sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
+    if (sqlType == Types.BIT && typeName.equals("bit") && size != 1) {
+      Some(BinaryType)
+    } else if (sqlType == Types.OTHER && typeName.equals("cidr")) {
+      Some(StringType)
+    } else if (sqlType == Types.OTHER && typeName.equals("inet")) {
+      Some(StringType)
+    } else None
+  }
+
+  override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
+    case StringType => Some(JdbcType("TEXT", java.sql.Types.CHAR))
+    case BinaryType => Some(JdbcType("BYTEA", java.sql.Types.BINARY))
+    case BooleanType => Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN))
+    case _ => None
+  }
+}
+
+/**
+ * :: DeveloperApi ::
+ * Default mysql dialect to read bit/bitsets correctly.
+ */
+@DeveloperApi
+case object MySQLDialect extends JdbcDialect {
+  def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql")
+  override def getCatalystType(
+      sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
+    if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
+      // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as
+      // byte arrays instead of longs.
+      md.putLong("binarylong", 1)
+      Some(LongType)
+    } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) {
+      Some(BooleanType)
+    } else None
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
index a61790b8472c8..f21dd29aca37f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala
@@ -129,25 +129,26 @@ package object jdbc {
      */
     def schemaString(df: DataFrame, url: String): String = {
       val sb = new StringBuilder()
-      val quirks = DriverQuirks.get(url)
+      val dialect = JdbcDialects.get(url)
       df.schema.fields foreach { field => {
         val name = field.name
-        var typ: String = quirks.getJDBCType(field.dataType)._1
-        if (typ == null) typ = field.dataType match {
-          case IntegerType => "INTEGER"
-          case LongType => "BIGINT"
-          case DoubleType => "DOUBLE PRECISION"
-          case FloatType => "REAL"
-          case ShortType => "INTEGER"
-          case ByteType => "BYTE"
-          case BooleanType => "BIT(1)"
-          case StringType => "TEXT"
-          case BinaryType => "BLOB"
-          case TimestampType => "TIMESTAMP"
-          case DateType => "DATE"
-          case DecimalType.Unlimited => "DECIMAL(40,20)"
-          case _ => throw new IllegalArgumentException(s"Don't know how to save $field to JDBC")
-        }
+        val typ: String =
+          dialect.getJDBCType(field.dataType).map(_.databaseTypeDefinition).getOrElse(
+          field.dataType match {
+            case IntegerType => "INTEGER"
+            case LongType => "BIGINT"
+            case DoubleType => "DOUBLE PRECISION"
+            case FloatType => "REAL"
+            case ShortType => "INTEGER"
+            case ByteType => "BYTE"
+            case BooleanType => "BIT(1)"
+            case StringType => "TEXT"
+            case BinaryType => "BLOB"
+            case TimestampType => "TIMESTAMP"
+            case DateType => "DATE"
+            case DecimalType.Unlimited => "DECIMAL(40,20)"
+            case _ => throw new IllegalArgumentException(s"Don't know how to save $field to JDBC")
+          })
         val nullable = if (field.nullable) "" else "NOT NULL"
         sb.append(s", $name $typ $nullable")
       }}
@@ -162,10 +163,9 @@ package object jdbc {
         url: String,
         table: String,
         properties: Properties = new Properties()) {
-      val quirks = DriverQuirks.get(url)
+      val dialect = JdbcDialects.get(url)
       val nullTypes: Array[Int] = df.schema.fields.map { field =>
-        val nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2
-        if (nullType.isEmpty) {
+        dialect.getJDBCType(field.dataType).map(_.jdbcNullType).getOrElse(
           field.dataType match {
             case IntegerType => java.sql.Types.INTEGER
             case LongType => java.sql.Types.BIGINT
@@ -181,8 +181,7 @@ package object jdbc {
             case DecimalType.Unlimited => java.sql.Types.DECIMAL
             case _ => throw new IllegalArgumentException(
               s"Can't translate null value for field $field")
-          }
-        } else nullType.get
+          })
       }
 
       val rddSchema = df.schema
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 5a7b6f0aac6f7..a8dddfb9b6858 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -35,6 +35,13 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
 
   val testBytes = Array[Byte](99.toByte, 134.toByte, 135.toByte, 200.toByte, 205.toByte)
 
+  val testH2Dialect = new JdbcDialect {
+    def canHandle(url: String) : Boolean = url.startsWith("jdbc:h2")
+    override def getCatalystType(
+        sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] =
+      Some(StringType)
+  }
+
   before {
     Class.forName("org.h2.Driver")
     // Extra properties that will be specified for our database. We need these to test
@@ -353,4 +360,46 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
         """.stripMargin.replaceAll("\n", " "))
     }
   }
+
+  test("Remap types via JdbcDialects") {
+    JdbcDialects.registerDialect(testH2Dialect)
+    val df = TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties)
+    assert(df.schema.filter(
+      _.dataType != org.apache.spark.sql.types.StringType
+    ).isEmpty)
+    val rows = df.collect()
+    assert(rows(0).get(0).isInstanceOf[String])
+    assert(rows(0).get(1).isInstanceOf[String])
+    JdbcDialects.unregisterDialect(testH2Dialect)
+  }
+
+  test("Default jdbc dialect registration") {
+    assert(JdbcDialects.get("jdbc:mysql://127.0.0.1/db") == MySQLDialect)
+    assert(JdbcDialects.get("jdbc:postgresql://127.0.0.1/db") == PostgresDialect)
+    assert(JdbcDialects.get("test.invalid") == NoopDialect)
+  }
+
+  test("Dialect unregister") {
+    JdbcDialects.registerDialect(testH2Dialect)
+    JdbcDialects.unregisterDialect(testH2Dialect)
+    assert(JdbcDialects.get(urlWithUserAndPass) == NoopDialect)
+  }
+
+  test("Aggregated dialects") {
+    val agg = new AggregatedDialect(List(new JdbcDialect {
+      def canHandle(url: String) : Boolean = url.startsWith("jdbc:h2:")
+      override def getCatalystType(
+          sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] =
+        if (sqlType % 2 == 0) {
+          Some(LongType)
+        } else {
+          None
+        }
+    }, testH2Dialect))
+    assert(agg.canHandle("jdbc:h2:xxx"))
+    assert(!agg.canHandle("jdbc:h2"))
+    assert(agg.getCatalystType(0,"",1,null) == Some(LongType))
+    assert(agg.getCatalystType(1,"",1,null) == Some(StringType))
+  }
+
 }

From 010a1c278037130a69dcc79427d2b0380a2c82d8 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Mon, 18 May 2015 11:59:44 -0700
Subject: [PATCH 232/320] [SPARK-7570] [SQL] Ignores _temporary during
 partition discovery

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/6091)
<!-- Reviewable:end -->

Author: Cheng Lian <lian@databricks.com>

Closes #6091 from liancheng/spark-7570 and squashes the following commits:

8ff07e8 [Cheng Lian] Ignores _temporary during partition discovery
---
 .../spark/sql/sources/PartitioningUtils.scala | 15 ++++++---
 .../ParquetPartitionDiscoverySuite.scala      | 31 ++++++++++---------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
index d1f0cdab55f66..8f8138d6ebebc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
@@ -23,8 +23,7 @@ import java.math.{BigDecimal => JBigDecimal}
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
 
-import com.google.common.cache.{CacheBuilder, Cache}
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
@@ -69,7 +68,7 @@ private[sql] object PartitioningUtils {
   private[sql] def parsePartitions(
       paths: Seq[Path],
       defaultPartitionName: String): PartitionSpec = {
-    val partitionValues = resolvePartitions(paths.map(parsePartition(_, defaultPartitionName)))
+    val partitionValues = resolvePartitions(paths.flatMap(parsePartition(_, defaultPartitionName)))
     val fields = {
       val (PartitionValues(columnNames, literals)) = partitionValues.head
       columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
@@ -103,13 +102,19 @@ private[sql] object PartitioningUtils {
    */
   private[sql] def parsePartition(
       path: Path,
-      defaultPartitionName: String): PartitionValues = {
+      defaultPartitionName: String): Option[PartitionValues] = {
     val columns = ArrayBuffer.empty[(String, Literal)]
     // Old Hadoop versions don't have `Path.isRoot`
     var finished = path.getParent == null
     var chopped = path
 
     while (!finished) {
+      // Sometimes (e.g., when speculative task is enabled), temporary directories may be left
+      // uncleaned.  Here we simply ignore them.
+      if (chopped.getName == "_temporary") {
+        return None
+      }
+
       val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName)
       maybeColumn.foreach(columns += _)
       chopped = chopped.getParent
@@ -117,7 +122,7 @@ private[sql] object PartitioningUtils {
     }
 
     val (columnNames, values) = columns.reverse.unzip
-    PartitionValues(columnNames, values)
+    Some(PartitionValues(columnNames, values))
   }
 
   private def parsePartitionColumn(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index 8079c460713da..1927114b8d58f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -54,44 +54,47 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
   }
 
   test("parse partition") {
-    def check(path: String, expected: PartitionValues): Unit = {
+    def check(path: String, expected: Option[PartitionValues]): Unit = {
       assert(expected === parsePartition(new Path(path), defaultPartitionName))
     }
 
     def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
       val message = intercept[T] {
-        parsePartition(new Path(path), defaultPartitionName)
+        parsePartition(new Path(path), defaultPartitionName).get
       }.getMessage
 
       assert(message.contains(expected))
     }
 
-    check(
-      "file:///",
+    check("file:///", Some {
       PartitionValues(
         ArrayBuffer.empty[String],
-        ArrayBuffer.empty[Literal]))
+        ArrayBuffer.empty[Literal])
+    })
 
-    check(
-      "file://path/a=10",
+    check("file://path/a=10", Some {
       PartitionValues(
         ArrayBuffer("a"),
-        ArrayBuffer(Literal.create(10, IntegerType))))
+        ArrayBuffer(Literal.create(10, IntegerType)))
+    })
 
-    check(
-      "file://path/a=10/b=hello/c=1.5",
+    check("file://path/a=10/b=hello/c=1.5", Some {
       PartitionValues(
         ArrayBuffer("a", "b", "c"),
         ArrayBuffer(
           Literal.create(10, IntegerType),
           Literal.create("hello", StringType),
-          Literal.create(1.5, FloatType))))
+          Literal.create(1.5, FloatType)))
+    })
 
-    check(
-      "file://path/a=10/b_hello/c=1.5",
+    check("file://path/a=10/b_hello/c=1.5", Some {
       PartitionValues(
         ArrayBuffer("c"),
-        ArrayBuffer(Literal.create(1.5, FloatType))))
+        ArrayBuffer(Literal.create(1.5, FloatType)))
+    })
+
+    check("file://path/a=10/_temporary/c=1.5", None)
+    check("file://path/a=10/c=1.5/_temporary", None)
 
     checkThrows[AssertionError]("file://path/=10", "Empty partition column name")
     checkThrows[AssertionError]("file://path/a=", "Empty partition column value")

From 56ede88485cfca90974425fcb603b257be47229b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Mon, 18 May 2015 12:01:30 -0700
Subject: [PATCH 233/320] [SQL] [MINOR] [THIS] use private for internal field
 in ScalaUdf

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6235 from cloud-fan/tmp and squashes the following commits:

8f16367 [Wenchen Fan] use private[this]
---
 .../apache/spark/sql/catalyst/expressions/ScalaUdf.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
index d22eb10ad399f..fe2873e0be34d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala
@@ -55,9 +55,9 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi
     }.foreach(println)
 
   */
-  
-  val f = children.size match {
-    case 0 => 
+
+  private[this] val f = children.size match {
+    case 0 =>
       val func = function.asInstanceOf[() => Any]
       (input: Row) => {
         func()
@@ -956,7 +956,7 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi
   }
 
   // scalastyle:on
-  val converter = CatalystTypeConverters.createToCatalystConverter(dataType)
+  private[this] val converter = CatalystTypeConverters.createToCatalystConverter(dataType)
   override def eval(input: Row): Any = converter(f(input))
 
 }

From 9c7e802a5a2b8cd3eb77642f84c54a8e976fc996 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 18 May 2015 12:02:18 -0700
Subject: [PATCH 234/320] [SPARK-7380] [MLLIB] pipeline stages should be
 copyable in Python

This PR makes pipeline stages in Python copyable and hence simplifies some implementations. It also includes the following changes:

1. Rename `paramMap` and `defaultParamMap` to `_paramMap` and `_defaultParamMap`, respectively.
2. Accept a list of param maps in `fit`.
3. Use parent uid and name to identify param.

jkbradley

Author: Xiangrui Meng <meng@databricks.com>
Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6088 from mengxr/SPARK-7380 and squashes the following commits:

413c463 [Xiangrui Meng] remove unnecessary doc
4159f35 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7380
611c719 [Xiangrui Meng] fix python style
68862b8 [Xiangrui Meng] update _java_obj initialization
927ad19 [Xiangrui Meng] fix ml/tests.py
0138fc3 [Xiangrui Meng] update feature transformers and fix a bug in RegexTokenizer
9ca44fb [Xiangrui Meng] simplify Java wrappers and add tests
c7d84ef [Xiangrui Meng] update ml/tests.py to test copy params
7e0d27f [Xiangrui Meng] merge master
46840fb [Xiangrui Meng] update wrappers
b6db1ed [Xiangrui Meng] update all self.paramMap to self._paramMap
46cb6ed [Xiangrui Meng] merge master
a163413 [Xiangrui Meng] fix style
1042e80 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7380
9630eae [Xiangrui Meng] fix Identifiable._randomUID
13bd70a [Xiangrui Meng] update ml/tests.py
64a536c [Xiangrui Meng] use _fit/_transform/_evaluate to simplify the impl
02abf13 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into copyable-python
66ce18c [Joseph K. Bradley] some cleanups before sending to Xiangrui
7431272 [Joseph K. Bradley] Rebased with master
---
 .../apache/spark/ml/feature/Tokenizer.scala   |   2 +-
 .../org/apache/spark/ml/param/params.scala    |   7 +-
 .../apache/spark/ml/util/Identifiable.scala   |   6 +-
 python/pyspark/ml/classification.py           |  35 ++---
 python/pyspark/ml/evaluation.py               |   6 +-
 python/pyspark/ml/feature.py                  |  91 +++++++------
 python/pyspark/ml/param/__init__.py           | 118 ++++++++++++-----
 .../ml/param/_shared_params_code_gen.py       |   2 +-
 python/pyspark/ml/param/shared.py             |  42 +++---
 python/pyspark/ml/pipeline.py                 | 109 +++++++++++----
 python/pyspark/ml/recommendation.py           |  25 ++--
 python/pyspark/ml/regression.py               |  30 +++--
 python/pyspark/ml/tests.py                    | 105 ++++++++++-----
 python/pyspark/ml/tuning.py                   |  43 ++++--
 python/pyspark/ml/util.py                     |  13 +-
 python/pyspark/ml/wrapper.py                  | 125 +++++++++++-------
 16 files changed, 498 insertions(+), 261 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 36d9e17eca41b..3f7f4f96fc422 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -61,7 +61,7 @@ class RegexTokenizer(override val uid: String)
    * Default: 1, to avoid returning empty strings
    * @group param
    */
-  val minTokenLength: IntParam = new IntParam(this, "minLength", "minimum token length (>= 0)",
+  val minTokenLength: IntParam = new IntParam(this, "minTokenLength", "minimum token length (>= 0)",
     ParamValidators.gtEq(0))
 
   /** @group setParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 247e08be1bb15..c33b66d31cd4f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -483,16 +483,15 @@ trait Params extends Identifiable with Serializable {
   def copy(extra: ParamMap): Params = {
     val that = this.getClass.getConstructor(classOf[String]).newInstance(uid)
     copyValues(that, extra)
-    that
   }
 
   /**
    * Extracts the embedded default param values and user-supplied values, and then merges them with
    * extra values from input into a flat param map, where the latter value is used if there exist
-   * conflicts, i.e., with ordering: default param values < user-supplied values < extraParamMap.
+   * conflicts, i.e., with ordering: default param values < user-supplied values < extra.
    */
-  final def extractParamMap(extraParamMap: ParamMap): ParamMap = {
-    defaultParamMap ++ paramMap ++ extraParamMap
+  final def extractParamMap(extra: ParamMap): ParamMap = {
+    defaultParamMap ++ paramMap ++ extra
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
index 146697680092c..ddd34a54503a6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
@@ -23,15 +23,17 @@ import java.util.UUID
 /**
  * Trait for an object with an immutable unique ID that identifies itself and its derivatives.
  */
-trait Identifiable {
+private[spark] trait Identifiable {
 
   /**
    * An immutable unique ID for the object and its derivatives.
    */
   val uid: String
+
+  override def toString: String = uid
 }
 
-object Identifiable {
+private[spark] object Identifiable {
 
   /**
    * Returns a random UID that concatenates the given prefix, "_", and 12 random hex chars.
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 1411d3fd9c56e..4e645519c47c7 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -55,7 +55,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
         ...
     TypeError: Method setParams forces keyword arguments.
     """
-    _java_class = "org.apache.spark.ml.classification.LogisticRegression"
+
     # a placeholder to make it appear in the generated doc
     elasticNetParam = \
         Param(Params._dummy(), "elasticNetParam",
@@ -75,6 +75,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  threshold=0.5, probabilityCol="probability")
         """
         super(LogisticRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.LogisticRegression", self.uid)
         #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
         #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
         self.elasticNetParam = \
@@ -111,7 +113,7 @@ def setElasticNetParam(self, value):
         """
         Sets the value of :py:attr:`elasticNetParam`.
         """
-        self.paramMap[self.elasticNetParam] = value
+        self._paramMap[self.elasticNetParam] = value
         return self
 
     def getElasticNetParam(self):
@@ -124,7 +126,7 @@ def setFitIntercept(self, value):
         """
         Sets the value of :py:attr:`fitIntercept`.
         """
-        self.paramMap[self.fitIntercept] = value
+        self._paramMap[self.fitIntercept] = value
         return self
 
     def getFitIntercept(self):
@@ -137,7 +139,7 @@ def setThreshold(self, value):
         """
         Sets the value of :py:attr:`threshold`.
         """
-        self.paramMap[self.threshold] = value
+        self._paramMap[self.threshold] = value
         return self
 
     def getThreshold(self):
@@ -208,7 +210,6 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     1.0
     """
 
-    _java_class = "org.apache.spark.ml.classification.DecisionTreeClassifier"
     # a placeholder to make it appear in the generated doc
     impurity = Param(Params._dummy(), "impurity",
                      "Criterion used for information gain calculation (case-insensitive). " +
@@ -224,6 +225,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
         """
         super(DecisionTreeClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.DecisionTreeClassifier", self.uid)
         #: param for Criterion used for information gain calculation (case-insensitive).
         self.impurity = \
             Param(self, "impurity",
@@ -256,7 +259,7 @@ def setImpurity(self, value):
         """
         Sets the value of :py:attr:`impurity`.
         """
-        self.paramMap[self.impurity] = value
+        self._paramMap[self.impurity] = value
         return self
 
     def getImpurity(self):
@@ -299,7 +302,6 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     1.0
     """
 
-    _java_class = "org.apache.spark.ml.classification.RandomForestClassifier"
     # a placeholder to make it appear in the generated doc
     impurity = Param(Params._dummy(), "impurity",
                      "Criterion used for information gain calculation (case-insensitive). " +
@@ -325,6 +327,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  numTrees=20, featureSubsetStrategy="auto", seed=42)
         """
         super(RandomForestClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
         #: param for Criterion used for information gain calculation (case-insensitive).
         self.impurity = \
             Param(self, "impurity",
@@ -370,7 +374,7 @@ def setImpurity(self, value):
         """
         Sets the value of :py:attr:`impurity`.
         """
-        self.paramMap[self.impurity] = value
+        self._paramMap[self.impurity] = value
         return self
 
     def getImpurity(self):
@@ -383,7 +387,7 @@ def setSubsamplingRate(self, value):
         """
         Sets the value of :py:attr:`subsamplingRate`.
         """
-        self.paramMap[self.subsamplingRate] = value
+        self._paramMap[self.subsamplingRate] = value
         return self
 
     def getSubsamplingRate(self):
@@ -396,7 +400,7 @@ def setNumTrees(self, value):
         """
         Sets the value of :py:attr:`numTrees`.
         """
-        self.paramMap[self.numTrees] = value
+        self._paramMap[self.numTrees] = value
         return self
 
     def getNumTrees(self):
@@ -409,7 +413,7 @@ def setFeatureSubsetStrategy(self, value):
         """
         Sets the value of :py:attr:`featureSubsetStrategy`.
         """
-        self.paramMap[self.featureSubsetStrategy] = value
+        self._paramMap[self.featureSubsetStrategy] = value
         return self
 
     def getFeatureSubsetStrategy(self):
@@ -452,7 +456,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     1.0
     """
 
-    _java_class = "org.apache.spark.ml.classification.GBTClassifier"
     # a placeholder to make it appear in the generated doc
     lossType = Param(Params._dummy(), "lossType",
                      "Loss function which GBT tries to minimize (case-insensitive). " +
@@ -476,6 +479,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  lossType="logistic", maxIter=20, stepSize=0.1)
         """
         super(GBTClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.GBTClassifier", self.uid)
         #: param for Loss function which GBT tries to minimize (case-insensitive).
         self.lossType = Param(self, "lossType",
                               "Loss function which GBT tries to minimize (case-insensitive). " +
@@ -517,7 +522,7 @@ def setLossType(self, value):
         """
         Sets the value of :py:attr:`lossType`.
         """
-        self.paramMap[self.lossType] = value
+        self._paramMap[self.lossType] = value
         return self
 
     def getLossType(self):
@@ -530,7 +535,7 @@ def setSubsamplingRate(self, value):
         """
         Sets the value of :py:attr:`subsamplingRate`.
         """
-        self.paramMap[self.subsamplingRate] = value
+        self._paramMap[self.subsamplingRate] = value
         return self
 
     def getSubsamplingRate(self):
@@ -543,7 +548,7 @@ def setStepSize(self, value):
         """
         Sets the value of :py:attr:`stepSize`.
         """
-        self.paramMap[self.stepSize] = value
+        self._paramMap[self.stepSize] = value
         return self
 
     def getStepSize(self):
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 02020ebff94c2..f4655c513cae7 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -42,8 +42,6 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     0.83...
     """
 
-    _java_class = "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator"
-
     # a placeholder to make it appear in the generated doc
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation (areaUnderROC|areaUnderPR)")
@@ -56,6 +54,8 @@ def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
                  metricName="areaUnderROC")
         """
         super(BinaryClassificationEvaluator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
         #: param for metric name in evaluation (areaUnderROC|areaUnderPR)
         self.metricName = Param(self, "metricName",
                                 "metric name in evaluation (areaUnderROC|areaUnderPR)")
@@ -68,7 +68,7 @@ def setMetricName(self, value):
         """
         Sets the value of :py:attr:`metricName`.
         """
-        self.paramMap[self.metricName] = value
+        self._paramMap[self.metricName] = value
         return self
 
     def getMetricName(self):
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 58e22190c7c3c..c8115cb5bcf63 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -43,7 +43,6 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
     1.0
     """
 
-    _java_class = "org.apache.spark.ml.feature.Binarizer"
     # a placeholder to make it appear in the generated doc
     threshold = Param(Params._dummy(), "threshold",
                       "threshold in binary classification prediction, in range [0, 1]")
@@ -54,6 +53,7 @@ def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
         __init__(self, threshold=0.0, inputCol=None, outputCol=None)
         """
         super(Binarizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid)
         self.threshold = Param(self, "threshold",
                                "threshold in binary classification prediction, in range [0, 1]")
         self._setDefault(threshold=0.0)
@@ -73,7 +73,7 @@ def setThreshold(self, value):
         """
         Sets the value of :py:attr:`threshold`.
         """
-        self.paramMap[self.threshold] = value
+        self._paramMap[self.threshold] = value
         return self
 
     def getThreshold(self):
@@ -104,7 +104,6 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
     0.0
     """
 
-    _java_class = "org.apache.spark.ml.feature.Bucketizer"
     # a placeholder to make it appear in the generated doc
     splits = \
         Param(Params._dummy(), "splits",
@@ -121,6 +120,7 @@ def __init__(self, splits=None, inputCol=None, outputCol=None):
         __init__(self, splits=None, inputCol=None, outputCol=None)
         """
         super(Bucketizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
         #: param for Splitting points for mapping continuous features into buckets. With n+1 splits,
         #  there are n buckets. A bucket defined by splits x,y holds values in the range [x,y)
         #  except the last bucket, which also includes y. The splits should be strictly increasing.
@@ -150,7 +150,7 @@ def setSplits(self, value):
         """
         Sets the value of :py:attr:`splits`.
         """
-        self.paramMap[self.splits] = value
+        self._paramMap[self.splits] = value
         return self
 
     def getSplits(self):
@@ -177,14 +177,13 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
     SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0})
     """
 
-    _java_class = "org.apache.spark.ml.feature.HashingTF"
-
     @keyword_only
     def __init__(self, numFeatures=1 << 18, inputCol=None, outputCol=None):
         """
         __init__(self, numFeatures=1 << 18, inputCol=None, outputCol=None)
         """
         super(HashingTF, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid)
         self._setDefault(numFeatures=1 << 18)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -217,8 +216,6 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
     DenseVector([0.2877, 0.0])
     """
 
-    _java_class = "org.apache.spark.ml.feature.IDF"
-
     # a placeholder to make it appear in the generated doc
     minDocFreq = Param(Params._dummy(), "minDocFreq",
                        "minimum of documents in which a term should appear for filtering")
@@ -229,6 +226,7 @@ def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
         __init__(self, minDocFreq=0, inputCol=None, outputCol=None)
         """
         super(IDF, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid)
         self.minDocFreq = Param(self, "minDocFreq",
                                 "minimum of documents in which a term should appear for filtering")
         self._setDefault(minDocFreq=0)
@@ -248,7 +246,7 @@ def setMinDocFreq(self, value):
         """
         Sets the value of :py:attr:`minDocFreq`.
         """
-        self.paramMap[self.minDocFreq] = value
+        self._paramMap[self.minDocFreq] = value
         return self
 
     def getMinDocFreq(self):
@@ -257,6 +255,9 @@ def getMinDocFreq(self):
         """
         return self.getOrDefault(self.minDocFreq)
 
+    def _create_model(self, java_model):
+        return IDFModel(java_model)
+
 
 class IDFModel(JavaModel):
     """
@@ -285,14 +286,13 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
     # a placeholder to make it appear in the generated doc
     p = Param(Params._dummy(), "p", "the p norm value.")
 
-    _java_class = "org.apache.spark.ml.feature.Normalizer"
-
     @keyword_only
     def __init__(self, p=2.0, inputCol=None, outputCol=None):
         """
         __init__(self, p=2.0, inputCol=None, outputCol=None)
         """
         super(Normalizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid)
         self.p = Param(self, "p", "the p norm value.")
         self._setDefault(p=2.0)
         kwargs = self.__init__._input_kwargs
@@ -311,7 +311,7 @@ def setP(self, value):
         """
         Sets the value of :py:attr:`p`.
         """
-        self.paramMap[self.p] = value
+        self._paramMap[self.p] = value
         return self
 
     def getP(self):
@@ -347,8 +347,6 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
     SparseVector(3, {0: 1.0})
     """
 
-    _java_class = "org.apache.spark.ml.feature.OneHotEncoder"
-
     # a placeholder to make it appear in the generated doc
     includeFirst = Param(Params._dummy(), "includeFirst", "include first category")
 
@@ -358,6 +356,7 @@ def __init__(self, includeFirst=True, inputCol=None, outputCol=None):
         __init__(self, includeFirst=True, inputCol=None, outputCol=None)
         """
         super(OneHotEncoder, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid)
         self.includeFirst = Param(self, "includeFirst", "include first category")
         self._setDefault(includeFirst=True)
         kwargs = self.__init__._input_kwargs
@@ -376,7 +375,7 @@ def setIncludeFirst(self, value):
         """
         Sets the value of :py:attr:`includeFirst`.
         """
-        self.paramMap[self.includeFirst] = value
+        self._paramMap[self.includeFirst] = value
         return self
 
     def getIncludeFirst(self):
@@ -404,8 +403,6 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
     DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
     """
 
-    _java_class = "org.apache.spark.ml.feature.PolynomialExpansion"
-
     # a placeholder to make it appear in the generated doc
     degree = Param(Params._dummy(), "degree", "the polynomial degree to expand (>= 1)")
 
@@ -415,6 +412,8 @@ def __init__(self, degree=2, inputCol=None, outputCol=None):
         __init__(self, degree=2, inputCol=None, outputCol=None)
         """
         super(PolynomialExpansion, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.feature.PolynomialExpansion", self.uid)
         self.degree = Param(self, "degree", "the polynomial degree to expand (>= 1)")
         self._setDefault(degree=2)
         kwargs = self.__init__._input_kwargs
@@ -433,7 +432,7 @@ def setDegree(self, value):
         """
         Sets the value of :py:attr:`degree`.
         """
-        self.paramMap[self.degree] = value
+        self._paramMap[self.degree] = value
         return self
 
     def getDegree(self):
@@ -471,7 +470,6 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     TypeError: Method setParams forces keyword arguments.
     """
 
-    _java_class = "org.apache.spark.ml.feature.RegexTokenizer"
     # a placeholder to make it appear in the generated doc
     minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)")
     gaps = Param(Params._dummy(), "gaps", "Set regex to match gaps or tokens")
@@ -485,7 +483,8 @@ def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+"
                  inputCol=None, outputCol=None)
         """
         super(RegexTokenizer, self).__init__()
-        self.minTokenLength = Param(self, "minLength", "minimum token length (>= 0)")
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid)
+        self.minTokenLength = Param(self, "minTokenLength", "minimum token length (>= 0)")
         self.gaps = Param(self, "gaps", "Set regex to match gaps or tokens")
         self.pattern = Param(self, "pattern", "regex pattern used for tokenizing")
         self._setDefault(minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+")
@@ -507,7 +506,7 @@ def setMinTokenLength(self, value):
         """
         Sets the value of :py:attr:`minTokenLength`.
         """
-        self.paramMap[self.minTokenLength] = value
+        self._paramMap[self.minTokenLength] = value
         return self
 
     def getMinTokenLength(self):
@@ -520,7 +519,7 @@ def setGaps(self, value):
         """
         Sets the value of :py:attr:`gaps`.
         """
-        self.paramMap[self.gaps] = value
+        self._paramMap[self.gaps] = value
         return self
 
     def getGaps(self):
@@ -533,7 +532,7 @@ def setPattern(self, value):
         """
         Sets the value of :py:attr:`pattern`.
         """
-        self.paramMap[self.pattern] = value
+        self._paramMap[self.pattern] = value
         return self
 
     def getPattern(self):
@@ -557,8 +556,6 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
     DenseVector([1.4142])
     """
 
-    _java_class = "org.apache.spark.ml.feature.StandardScaler"
-
     # a placeholder to make it appear in the generated doc
     withMean = Param(Params._dummy(), "withMean", "Center data with mean")
     withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation")
@@ -569,6 +566,7 @@ def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
         __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
         """
         super(StandardScaler, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid)
         self.withMean = Param(self, "withMean", "Center data with mean")
         self.withStd = Param(self, "withStd", "Scale to unit standard deviation")
         self._setDefault(withMean=False, withStd=True)
@@ -588,7 +586,7 @@ def setWithMean(self, value):
         """
         Sets the value of :py:attr:`withMean`.
         """
-        self.paramMap[self.withMean] = value
+        self._paramMap[self.withMean] = value
         return self
 
     def getWithMean(self):
@@ -601,7 +599,7 @@ def setWithStd(self, value):
         """
         Sets the value of :py:attr:`withStd`.
         """
-        self.paramMap[self.withStd] = value
+        self._paramMap[self.withStd] = value
         return self
 
     def getWithStd(self):
@@ -610,6 +608,9 @@ def getWithStd(self):
         """
         return self.getOrDefault(self.withStd)
 
+    def _create_model(self, java_model):
+        return StandardScalerModel(java_model)
+
 
 class StandardScalerModel(JavaModel):
     """
@@ -633,14 +634,13 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
     [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)]
     """
 
-    _java_class = "org.apache.spark.ml.feature.StringIndexer"
-
     @keyword_only
     def __init__(self, inputCol=None, outputCol=None):
         """
         __init__(self, inputCol=None, outputCol=None)
         """
         super(StringIndexer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -653,6 +653,9 @@ def setParams(self, inputCol=None, outputCol=None):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
+    def _create_model(self, java_model):
+        return StringIndexerModel(java_model)
+
 
 class StringIndexerModel(JavaModel):
     """
@@ -686,14 +689,13 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     TypeError: Method setParams forces keyword arguments.
     """
 
-    _java_class = "org.apache.spark.ml.feature.Tokenizer"
-
     @keyword_only
     def __init__(self, inputCol=None, outputCol=None):
         """
         __init__(self, inputCol=None, outputCol=None)
         """
         super(Tokenizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -723,14 +725,13 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
     DenseVector([0.0, 1.0])
     """
 
-    _java_class = "org.apache.spark.ml.feature.VectorAssembler"
-
     @keyword_only
     def __init__(self, inputCols=None, outputCol=None):
         """
         __init__(self, inputCols=None, outputCol=None)
         """
         super(VectorAssembler, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -797,7 +798,6 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
     DenseVector([1.0, 0.0])
     """
 
-    _java_class = "org.apache.spark.ml.feature.VectorIndexer"
     # a placeholder to make it appear in the generated doc
     maxCategories = Param(Params._dummy(), "maxCategories",
                           "Threshold for the number of values a categorical feature can take " +
@@ -810,6 +810,7 @@ def __init__(self, maxCategories=20, inputCol=None, outputCol=None):
         __init__(self, maxCategories=20, inputCol=None, outputCol=None)
         """
         super(VectorIndexer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid)
         self.maxCategories = Param(self, "maxCategories",
                                    "Threshold for the number of values a categorical feature " +
                                    "can take (>= 2). If a feature is found to have " +
@@ -831,7 +832,7 @@ def setMaxCategories(self, value):
         """
         Sets the value of :py:attr:`maxCategories`.
         """
-        self.paramMap[self.maxCategories] = value
+        self._paramMap[self.maxCategories] = value
         return self
 
     def getMaxCategories(self):
@@ -840,6 +841,15 @@ def getMaxCategories(self):
         """
         return self.getOrDefault(self.maxCategories)
 
+    def _create_model(self, java_model):
+        return VectorIndexerModel(java_model)
+
+
+class VectorIndexerModel(JavaModel):
+    """
+    Model fitted by VectorIndexer.
+    """
+
 
 @inherit_doc
 @ignore_unicode_prefix
@@ -855,7 +865,6 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276])
     """
 
-    _java_class = "org.apache.spark.ml.feature.Word2Vec"
     # a placeholder to make it appear in the generated doc
     vectorSize = Param(Params._dummy(), "vectorSize",
                        "the dimension of codes after transforming from words")
@@ -873,6 +882,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
                  seed=42, inputCol=None, outputCol=None)
         """
         super(Word2Vec, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
         self.vectorSize = Param(self, "vectorSize",
                                 "the dimension of codes after transforming from words")
         self.numPartitions = Param(self, "numPartitions",
@@ -900,7 +910,7 @@ def setVectorSize(self, value):
         """
         Sets the value of :py:attr:`vectorSize`.
         """
-        self.paramMap[self.vectorSize] = value
+        self._paramMap[self.vectorSize] = value
         return self
 
     def getVectorSize(self):
@@ -913,7 +923,7 @@ def setNumPartitions(self, value):
         """
         Sets the value of :py:attr:`numPartitions`.
         """
-        self.paramMap[self.numPartitions] = value
+        self._paramMap[self.numPartitions] = value
         return self
 
     def getNumPartitions(self):
@@ -926,7 +936,7 @@ def setMinCount(self, value):
         """
         Sets the value of :py:attr:`minCount`.
         """
-        self.paramMap[self.minCount] = value
+        self._paramMap[self.minCount] = value
         return self
 
     def getMinCount(self):
@@ -935,6 +945,9 @@ def getMinCount(self):
         """
         return self.getOrDefault(self.minCount)
 
+    def _create_model(self, java_model):
+        return Word2VecModel(java_model)
+
 
 class Word2VecModel(JavaModel):
     """
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 49c20b4cf70cf..67fb6e3dc74fb 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -16,6 +16,7 @@
 #
 
 from abc import ABCMeta
+import copy
 
 from pyspark.ml.util import Identifiable
 
@@ -29,9 +30,9 @@ class Param(object):
     """
 
     def __init__(self, parent, name, doc):
-        if not isinstance(parent, Params):
-            raise TypeError("Parent must be a Params but got type %s." % type(parent))
-        self.parent = parent
+        if not isinstance(parent, Identifiable):
+            raise TypeError("Parent must be an Identifiable but got type %s." % type(parent))
+        self.parent = parent.uid
         self.name = str(name)
         self.doc = str(doc)
 
@@ -41,6 +42,15 @@ def __str__(self):
     def __repr__(self):
         return "Param(parent=%r, name=%r, doc=%r)" % (self.parent, self.name, self.doc)
 
+    def __hash__(self):
+        return hash(str(self))
+
+    def __eq__(self, other):
+        if isinstance(other, Param):
+            return self.parent == other.parent and self.name == other.name
+        else:
+            return False
+
 
 class Params(Identifiable):
     """
@@ -51,10 +61,13 @@ class Params(Identifiable):
     __metaclass__ = ABCMeta
 
     #: internal param map for user-supplied values param map
-    paramMap = {}
+    _paramMap = {}
 
     #: internal param map for default values
-    defaultParamMap = {}
+    _defaultParamMap = {}
+
+    #: value returned by :py:func:`params`
+    _params = None
 
     @property
     def params(self):
@@ -63,10 +76,12 @@ def params(self):
         uses :py:func:`dir` to get all attributes of type
         :py:class:`Param`.
         """
-        return list(filter(lambda attr: isinstance(attr, Param),
-                           [getattr(self, x) for x in dir(self) if x != "params"]))
+        if self._params is None:
+            self._params = list(filter(lambda attr: isinstance(attr, Param),
+                                       [getattr(self, x) for x in dir(self) if x != "params"]))
+        return self._params
 
-    def _explain(self, param):
+    def explainParam(self, param):
         """
         Explains a single param and returns its name, doc, and optional
         default value and user-supplied value in a string.
@@ -74,10 +89,10 @@ def _explain(self, param):
         param = self._resolveParam(param)
         values = []
         if self.isDefined(param):
-            if param in self.defaultParamMap:
-                values.append("default: %s" % self.defaultParamMap[param])
-            if param in self.paramMap:
-                values.append("current: %s" % self.paramMap[param])
+            if param in self._defaultParamMap:
+                values.append("default: %s" % self._defaultParamMap[param])
+            if param in self._paramMap:
+                values.append("current: %s" % self._paramMap[param])
         else:
             values.append("undefined")
         valueStr = "(" + ", ".join(values) + ")"
@@ -88,7 +103,7 @@ def explainParams(self):
         Returns the documentation of all params with their optionally
         default values and user-supplied values.
         """
-        return "\n".join([self._explain(param) for param in self.params])
+        return "\n".join([self.explainParam(param) for param in self.params])
 
     def getParam(self, paramName):
         """
@@ -105,56 +120,76 @@ def isSet(self, param):
         Checks whether a param is explicitly set by user.
         """
         param = self._resolveParam(param)
-        return param in self.paramMap
+        return param in self._paramMap
 
     def hasDefault(self, param):
         """
         Checks whether a param has a default value.
         """
         param = self._resolveParam(param)
-        return param in self.defaultParamMap
+        return param in self._defaultParamMap
 
     def isDefined(self, param):
         """
-        Checks whether a param is explicitly set by user or has a default value.
+        Checks whether a param is explicitly set by user or has
+        a default value.
         """
         return self.isSet(param) or self.hasDefault(param)
 
+    def hasParam(self, paramName):
+        """
+        Tests whether this instance contains a param with a given
+        (string) name.
+        """
+        param = self._resolveParam(paramName)
+        return param in self.params
+
     def getOrDefault(self, param):
         """
         Gets the value of a param in the user-supplied param map or its
         default value. Raises an error if either is set.
         """
-        if isinstance(param, Param):
-            if param in self.paramMap:
-                return self.paramMap[param]
-            else:
-                return self.defaultParamMap[param]
-        elif isinstance(param, str):
-            return self.getOrDefault(self.getParam(param))
+        param = self._resolveParam(param)
+        if param in self._paramMap:
+            return self._paramMap[param]
         else:
-            raise KeyError("Cannot recognize %r as a param." % param)
+            return self._defaultParamMap[param]
 
-    def extractParamMap(self, extraParamMap={}):
+    def extractParamMap(self, extra={}):
         """
         Extracts the embedded default param values and user-supplied
         values, and then merges them with extra values from input into
         a flat param map, where the latter value is used if there exist
         conflicts, i.e., with ordering: default param values <
-        user-supplied values < extraParamMap.
-        :param extraParamMap: extra param values
+        user-supplied values < extra.
+        :param extra: extra param values
         :return: merged param map
         """
-        paramMap = self.defaultParamMap.copy()
-        paramMap.update(self.paramMap)
-        paramMap.update(extraParamMap)
+        paramMap = self._defaultParamMap.copy()
+        paramMap.update(self._paramMap)
+        paramMap.update(extra)
         return paramMap
 
+    def copy(self, extra={}):
+        """
+        Creates a copy of this instance with the same uid and some
+        extra params. The default implementation creates a
+        shallow copy using :py:func:`copy.copy`, and then copies the
+        embedded and extra parameters over and returns the copy.
+        Subclasses should override this method if the default approach
+        is not sufficient.
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        that = copy.copy(self)
+        that._paramMap = self.extractParamMap(extra)
+        return that
+
     def _shouldOwn(self, param):
         """
         Validates that the input param belongs to this Params instance.
         """
-        if param.parent is not self:
+        if not (self.uid == param.parent and self.hasParam(param.name)):
             raise ValueError("Param %r does not belong to %r." % (param, self))
 
     def _resolveParam(self, param):
@@ -175,7 +210,8 @@ def _resolveParam(self, param):
     @staticmethod
     def _dummy():
         """
-        Returns a dummy Params instance used as a placeholder to generate docs.
+        Returns a dummy Params instance used as a placeholder to
+        generate docs.
         """
         dummy = Params()
         dummy.uid = "undefined"
@@ -186,7 +222,7 @@ def _set(self, **kwargs):
         Sets user-supplied params.
         """
         for param, value in kwargs.items():
-            self.paramMap[getattr(self, param)] = value
+            self._paramMap[getattr(self, param)] = value
         return self
 
     def _setDefault(self, **kwargs):
@@ -194,5 +230,19 @@ def _setDefault(self, **kwargs):
         Sets default params.
         """
         for param, value in kwargs.items():
-            self.defaultParamMap[getattr(self, param)] = value
+            self._defaultParamMap[getattr(self, param)] = value
         return self
+
+    def _copyValues(self, to, extra={}):
+        """
+        Copies param values from this instance to another instance for
+        params shared by them.
+        :param to: the target instance
+        :param extra: extra params to be copied
+        :return: the target instance with param values copied
+        """
+        paramMap = self.extractParamMap(extra)
+        for p in self.params:
+            if p in paramMap and to.hasParam(p.name):
+                to._set(**{p.name: paramMap[p]})
+        return to
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 6fa9b8c2cf367..91e45ec373518 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -83,7 +83,7 @@ def set$Name(self, value):
         """
         Sets the value of :py:attr:`$name`.
         """
-        self.paramMap[self.$name] = value
+        self._paramMap[self.$name] = value
         return self
 
     def get$Name(self):
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index b116f05a068d3..a5dc9b7ef29ed 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -39,7 +39,7 @@ def setMaxIter(self, value):
         """
         Sets the value of :py:attr:`maxIter`.
         """
-        self.paramMap[self.maxIter] = value
+        self._paramMap[self.maxIter] = value
         return self
 
     def getMaxIter(self):
@@ -68,7 +68,7 @@ def setRegParam(self, value):
         """
         Sets the value of :py:attr:`regParam`.
         """
-        self.paramMap[self.regParam] = value
+        self._paramMap[self.regParam] = value
         return self
 
     def getRegParam(self):
@@ -97,7 +97,7 @@ def setFeaturesCol(self, value):
         """
         Sets the value of :py:attr:`featuresCol`.
         """
-        self.paramMap[self.featuresCol] = value
+        self._paramMap[self.featuresCol] = value
         return self
 
     def getFeaturesCol(self):
@@ -126,7 +126,7 @@ def setLabelCol(self, value):
         """
         Sets the value of :py:attr:`labelCol`.
         """
-        self.paramMap[self.labelCol] = value
+        self._paramMap[self.labelCol] = value
         return self
 
     def getLabelCol(self):
@@ -155,7 +155,7 @@ def setPredictionCol(self, value):
         """
         Sets the value of :py:attr:`predictionCol`.
         """
-        self.paramMap[self.predictionCol] = value
+        self._paramMap[self.predictionCol] = value
         return self
 
     def getPredictionCol(self):
@@ -184,7 +184,7 @@ def setProbabilityCol(self, value):
         """
         Sets the value of :py:attr:`probabilityCol`.
         """
-        self.paramMap[self.probabilityCol] = value
+        self._paramMap[self.probabilityCol] = value
         return self
 
     def getProbabilityCol(self):
@@ -213,7 +213,7 @@ def setRawPredictionCol(self, value):
         """
         Sets the value of :py:attr:`rawPredictionCol`.
         """
-        self.paramMap[self.rawPredictionCol] = value
+        self._paramMap[self.rawPredictionCol] = value
         return self
 
     def getRawPredictionCol(self):
@@ -242,7 +242,7 @@ def setInputCol(self, value):
         """
         Sets the value of :py:attr:`inputCol`.
         """
-        self.paramMap[self.inputCol] = value
+        self._paramMap[self.inputCol] = value
         return self
 
     def getInputCol(self):
@@ -271,7 +271,7 @@ def setInputCols(self, value):
         """
         Sets the value of :py:attr:`inputCols`.
         """
-        self.paramMap[self.inputCols] = value
+        self._paramMap[self.inputCols] = value
         return self
 
     def getInputCols(self):
@@ -300,7 +300,7 @@ def setOutputCol(self, value):
         """
         Sets the value of :py:attr:`outputCol`.
         """
-        self.paramMap[self.outputCol] = value
+        self._paramMap[self.outputCol] = value
         return self
 
     def getOutputCol(self):
@@ -329,7 +329,7 @@ def setNumFeatures(self, value):
         """
         Sets the value of :py:attr:`numFeatures`.
         """
-        self.paramMap[self.numFeatures] = value
+        self._paramMap[self.numFeatures] = value
         return self
 
     def getNumFeatures(self):
@@ -358,7 +358,7 @@ def setCheckpointInterval(self, value):
         """
         Sets the value of :py:attr:`checkpointInterval`.
         """
-        self.paramMap[self.checkpointInterval] = value
+        self._paramMap[self.checkpointInterval] = value
         return self
 
     def getCheckpointInterval(self):
@@ -387,7 +387,7 @@ def setSeed(self, value):
         """
         Sets the value of :py:attr:`seed`.
         """
-        self.paramMap[self.seed] = value
+        self._paramMap[self.seed] = value
         return self
 
     def getSeed(self):
@@ -416,7 +416,7 @@ def setTol(self, value):
         """
         Sets the value of :py:attr:`tol`.
         """
-        self.paramMap[self.tol] = value
+        self._paramMap[self.tol] = value
         return self
 
     def getTol(self):
@@ -445,7 +445,7 @@ def setStepSize(self, value):
         """
         Sets the value of :py:attr:`stepSize`.
         """
-        self.paramMap[self.stepSize] = value
+        self._paramMap[self.stepSize] = value
         return self
 
     def getStepSize(self):
@@ -487,7 +487,7 @@ def setMaxDepth(self, value):
         """
         Sets the value of :py:attr:`maxDepth`.
         """
-        self.paramMap[self.maxDepth] = value
+        self._paramMap[self.maxDepth] = value
         return self
 
     def getMaxDepth(self):
@@ -500,7 +500,7 @@ def setMaxBins(self, value):
         """
         Sets the value of :py:attr:`maxBins`.
         """
-        self.paramMap[self.maxBins] = value
+        self._paramMap[self.maxBins] = value
         return self
 
     def getMaxBins(self):
@@ -513,7 +513,7 @@ def setMinInstancesPerNode(self, value):
         """
         Sets the value of :py:attr:`minInstancesPerNode`.
         """
-        self.paramMap[self.minInstancesPerNode] = value
+        self._paramMap[self.minInstancesPerNode] = value
         return self
 
     def getMinInstancesPerNode(self):
@@ -526,7 +526,7 @@ def setMinInfoGain(self, value):
         """
         Sets the value of :py:attr:`minInfoGain`.
         """
-        self.paramMap[self.minInfoGain] = value
+        self._paramMap[self.minInfoGain] = value
         return self
 
     def getMinInfoGain(self):
@@ -539,7 +539,7 @@ def setMaxMemoryInMB(self, value):
         """
         Sets the value of :py:attr:`maxMemoryInMB`.
         """
-        self.paramMap[self.maxMemoryInMB] = value
+        self._paramMap[self.maxMemoryInMB] = value
         return self
 
     def getMaxMemoryInMB(self):
@@ -552,7 +552,7 @@ def setCacheNodeIds(self, value):
         """
         Sets the value of :py:attr:`cacheNodeIds`.
         """
-        self.paramMap[self.cacheNodeIds] = value
+        self._paramMap[self.cacheNodeIds] = value
         return self
 
     def getCacheNodeIds(self):
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index a328bcf84a2e7..0f38e021273b0 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -31,18 +31,40 @@ class Estimator(Params):
     __metaclass__ = ABCMeta
 
     @abstractmethod
-    def fit(self, dataset, params={}):
+    def _fit(self, dataset):
         """
-        Fits a model to the input dataset with optional parameters.
+        Fits a model to the input dataset. This is called by the
+        default implementation of fit.
 
         :param dataset: input dataset, which is an instance of
                         :py:class:`pyspark.sql.DataFrame`
-        :param params: an optional param map that overwrites embedded
-                       params
         :returns: fitted model
         """
         raise NotImplementedError()
 
+    def fit(self, dataset, params={}):
+        """
+        Fits a model to the input dataset with optional parameters.
+
+        :param dataset: input dataset, which is an instance of
+                        :py:class:`pyspark.sql.DataFrame`
+        :param params: an optional param map that overrides embedded
+                       params. If a list/tuple of param maps is given,
+                       this calls fit on each param map and returns a
+                       list of models.
+        :returns: fitted model(s)
+        """
+        if isinstance(params, (list, tuple)):
+            return [self.fit(dataset, paramMap) for paramMap in params]
+        elif isinstance(params, dict):
+            if params:
+                return self.copy(params)._fit(dataset)
+            else:
+                return self._fit(dataset)
+        else:
+            raise ValueError("Params must be either a param map or a list/tuple of param maps, "
+                             "but got %s." % type(params))
+
 
 @inherit_doc
 class Transformer(Params):
@@ -54,18 +76,34 @@ class Transformer(Params):
     __metaclass__ = ABCMeta
 
     @abstractmethod
-    def transform(self, dataset, params={}):
+    def _transform(self, dataset):
         """
         Transforms the input dataset with optional parameters.
 
         :param dataset: input dataset, which is an instance of
                         :py:class:`pyspark.sql.DataFrame`
-        :param params: an optional param map that overwrites embedded
-                       params
         :returns: transformed dataset
         """
         raise NotImplementedError()
 
+    def transform(self, dataset, params={}):
+        """
+        Transforms the input dataset with optional parameters.
+
+        :param dataset: input dataset, which is an instance of
+                        :py:class:`pyspark.sql.DataFrame`
+        :param params: an optional param map that overrides embedded
+                       params.
+        :returns: transformed dataset
+        """
+        if isinstance(params, dict):
+            if params:
+                return self.copy(params,)._transform(dataset)
+            else:
+                return self._transform(dataset)
+        else:
+            raise ValueError("Params must be either a param map but got %s." % type(params))
+
 
 @inherit_doc
 class Model(Transformer):
@@ -113,15 +151,15 @@ def setStages(self, value):
         :param value: a list of transformers or estimators
         :return: the pipeline instance
         """
-        self.paramMap[self.stages] = value
+        self._paramMap[self.stages] = value
         return self
 
     def getStages(self):
         """
         Get pipeline stages.
         """
-        if self.stages in self.paramMap:
-            return self.paramMap[self.stages]
+        if self.stages in self._paramMap:
+            return self._paramMap[self.stages]
 
     @keyword_only
     def setParams(self, stages=[]):
@@ -132,9 +170,8 @@ def setParams(self, stages=[]):
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
-    def fit(self, dataset, params={}):
-        paramMap = self.extractParamMap(params)
-        stages = paramMap[self.stages]
+    def _fit(self, dataset):
+        stages = self.getStages()
         for stage in stages:
             if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)):
                 raise TypeError(
@@ -148,16 +185,21 @@ def fit(self, dataset, params={}):
             if i <= indexOfLastEstimator:
                 if isinstance(stage, Transformer):
                     transformers.append(stage)
-                    dataset = stage.transform(dataset, paramMap)
+                    dataset = stage.transform(dataset)
                 else:  # must be an Estimator
-                    model = stage.fit(dataset, paramMap)
+                    model = stage.fit(dataset)
                     transformers.append(model)
                     if i < indexOfLastEstimator:
-                        dataset = model.transform(dataset, paramMap)
+                        dataset = model.transform(dataset)
             else:
                 transformers.append(stage)
         return PipelineModel(transformers)
 
+    def copy(self, extra={}):
+        that = Params.copy(self, extra)
+        stages = [stage.copy(extra) for stage in that.getStages()]
+        return that.setStages(stages)
+
 
 @inherit_doc
 class PipelineModel(Model):
@@ -165,16 +207,19 @@ class PipelineModel(Model):
     Represents a compiled pipeline with transformers and fitted models.
     """
 
-    def __init__(self, transformers):
+    def __init__(self, stages):
         super(PipelineModel, self).__init__()
-        self.transformers = transformers
+        self.stages = stages
 
-    def transform(self, dataset, params={}):
-        paramMap = self.extractParamMap(params)
-        for t in self.transformers:
-            dataset = t.transform(dataset, paramMap)
+    def _transform(self, dataset):
+        for t in self.stages:
+            dataset = t.transform(dataset)
         return dataset
 
+    def copy(self, extra={}):
+        stages = [stage.copy(extra) for stage in self.stages]
+        return PipelineModel(stages)
+
 
 class Evaluator(Params):
     """
@@ -184,14 +229,30 @@ class Evaluator(Params):
     __metaclass__ = ABCMeta
 
     @abstractmethod
-    def evaluate(self, dataset, params={}):
+    def _evaluate(self, dataset):
         """
         Evaluates the output.
 
+        :param dataset: a dataset that contains labels/observations and
+               predictions
+        :return: metric
+        """
+        raise NotImplementedError()
+
+    def evaluate(self, dataset, params={}):
+        """
+        Evaluates the output with optional parameters.
+
         :param dataset: a dataset that contains labels/observations and
                         predictions
         :param params: an optional param map that overrides embedded
                        params
         :return: metric
         """
-        raise NotImplementedError()
+        if isinstance(params, dict):
+            if params:
+                return self.copy(params)._evaluate(dataset)
+            else:
+                return self._evaluate(dataset)
+        else:
+            raise ValueError("Params must be a param map but got %s." % type(params))
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index b2439cbd96522..39c2527543774 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -74,7 +74,7 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     >>> predictions[2]
     Row(user=2, item=0, prediction=-1.15...)
     """
-    _java_class = "org.apache.spark.ml.recommendation.ALS"
+
     # a placeholder to make it appear in the generated doc
     rank = Param(Params._dummy(), "rank", "rank of the factorization")
     numUserBlocks = Param(Params._dummy(), "numUserBlocks", "number of user blocks")
@@ -97,6 +97,7 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
                  ratingCol="rating", nonnegative=false, checkpointInterval=10)
         """
         super(ALS, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.recommendation.ALS", self.uid)
         self.rank = Param(self, "rank", "rank of the factorization")
         self.numUserBlocks = Param(self, "numUserBlocks", "number of user blocks")
         self.numItemBlocks = Param(self, "numItemBlocks", "number of item blocks")
@@ -133,7 +134,7 @@ def setRank(self, value):
         """
         Sets the value of :py:attr:`rank`.
         """
-        self.paramMap[self.rank] = value
+        self._paramMap[self.rank] = value
         return self
 
     def getRank(self):
@@ -146,7 +147,7 @@ def setNumUserBlocks(self, value):
         """
         Sets the value of :py:attr:`numUserBlocks`.
         """
-        self.paramMap[self.numUserBlocks] = value
+        self._paramMap[self.numUserBlocks] = value
         return self
 
     def getNumUserBlocks(self):
@@ -159,7 +160,7 @@ def setNumItemBlocks(self, value):
         """
         Sets the value of :py:attr:`numItemBlocks`.
         """
-        self.paramMap[self.numItemBlocks] = value
+        self._paramMap[self.numItemBlocks] = value
         return self
 
     def getNumItemBlocks(self):
@@ -172,14 +173,14 @@ def setNumBlocks(self, value):
         """
         Sets both :py:attr:`numUserBlocks` and :py:attr:`numItemBlocks` to the specific value.
         """
-        self.paramMap[self.numUserBlocks] = value
-        self.paramMap[self.numItemBlocks] = value
+        self._paramMap[self.numUserBlocks] = value
+        self._paramMap[self.numItemBlocks] = value
 
     def setImplicitPrefs(self, value):
         """
         Sets the value of :py:attr:`implicitPrefs`.
         """
-        self.paramMap[self.implicitPrefs] = value
+        self._paramMap[self.implicitPrefs] = value
         return self
 
     def getImplicitPrefs(self):
@@ -192,7 +193,7 @@ def setAlpha(self, value):
         """
         Sets the value of :py:attr:`alpha`.
         """
-        self.paramMap[self.alpha] = value
+        self._paramMap[self.alpha] = value
         return self
 
     def getAlpha(self):
@@ -205,7 +206,7 @@ def setUserCol(self, value):
         """
         Sets the value of :py:attr:`userCol`.
         """
-        self.paramMap[self.userCol] = value
+        self._paramMap[self.userCol] = value
         return self
 
     def getUserCol(self):
@@ -218,7 +219,7 @@ def setItemCol(self, value):
         """
         Sets the value of :py:attr:`itemCol`.
         """
-        self.paramMap[self.itemCol] = value
+        self._paramMap[self.itemCol] = value
         return self
 
     def getItemCol(self):
@@ -231,7 +232,7 @@ def setRatingCol(self, value):
         """
         Sets the value of :py:attr:`ratingCol`.
         """
-        self.paramMap[self.ratingCol] = value
+        self._paramMap[self.ratingCol] = value
         return self
 
     def getRatingCol(self):
@@ -244,7 +245,7 @@ def setNonnegative(self, value):
         """
         Sets the value of :py:attr:`nonnegative`.
         """
-        self.paramMap[self.nonnegative] = value
+        self._paramMap[self.nonnegative] = value
         return self
 
     def getNonnegative(self):
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index ef77e19327188..ff809cdafdf51 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -62,7 +62,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
         ...
     TypeError: Method setParams forces keyword arguments.
     """
-    _java_class = "org.apache.spark.ml.regression.LinearRegression"
+
     # a placeholder to make it appear in the generated doc
     elasticNetParam = \
         Param(Params._dummy(), "elasticNetParam",
@@ -77,6 +77,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
         """
         super(LinearRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.LinearRegression", self.uid)
         #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
         #  is an L2 penalty. For alpha = 1, it is an L1 penalty.
         self.elasticNetParam = \
@@ -105,7 +107,7 @@ def setElasticNetParam(self, value):
         """
         Sets the value of :py:attr:`elasticNetParam`.
         """
-        self.paramMap[self.elasticNetParam] = value
+        self._paramMap[self.elasticNetParam] = value
         return self
 
     def getElasticNetParam(self):
@@ -178,7 +180,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     1.0
     """
 
-    _java_class = "org.apache.spark.ml.regression.DecisionTreeRegressor"
     # a placeholder to make it appear in the generated doc
     impurity = Param(Params._dummy(), "impurity",
                      "Criterion used for information gain calculation (case-insensitive). " +
@@ -194,6 +195,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
         """
         super(DecisionTreeRegressor, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid)
         #: param for Criterion used for information gain calculation (case-insensitive).
         self.impurity = \
             Param(self, "impurity",
@@ -226,7 +229,7 @@ def setImpurity(self, value):
         """
         Sets the value of :py:attr:`impurity`.
         """
-        self.paramMap[self.impurity] = value
+        self._paramMap[self.impurity] = value
         return self
 
     def getImpurity(self):
@@ -264,7 +267,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     0.5
     """
 
-    _java_class = "org.apache.spark.ml.regression.RandomForestRegressor"
     # a placeholder to make it appear in the generated doc
     impurity = Param(Params._dummy(), "impurity",
                      "Criterion used for information gain calculation (case-insensitive). " +
@@ -290,6 +292,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  impurity="variance", numTrees=20, featureSubsetStrategy="auto", seed=42)
         """
         super(RandomForestRegressor, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.RandomForestRegressor", self.uid)
         #: param for Criterion used for information gain calculation (case-insensitive).
         self.impurity = \
             Param(self, "impurity",
@@ -335,7 +339,7 @@ def setImpurity(self, value):
         """
         Sets the value of :py:attr:`impurity`.
         """
-        self.paramMap[self.impurity] = value
+        self._paramMap[self.impurity] = value
         return self
 
     def getImpurity(self):
@@ -348,7 +352,7 @@ def setSubsamplingRate(self, value):
         """
         Sets the value of :py:attr:`subsamplingRate`.
         """
-        self.paramMap[self.subsamplingRate] = value
+        self._paramMap[self.subsamplingRate] = value
         return self
 
     def getSubsamplingRate(self):
@@ -361,7 +365,7 @@ def setNumTrees(self, value):
         """
         Sets the value of :py:attr:`numTrees`.
         """
-        self.paramMap[self.numTrees] = value
+        self._paramMap[self.numTrees] = value
         return self
 
     def getNumTrees(self):
@@ -374,7 +378,7 @@ def setFeatureSubsetStrategy(self, value):
         """
         Sets the value of :py:attr:`featureSubsetStrategy`.
         """
-        self.paramMap[self.featureSubsetStrategy] = value
+        self._paramMap[self.featureSubsetStrategy] = value
         return self
 
     def getFeatureSubsetStrategy(self):
@@ -412,7 +416,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     1.0
     """
 
-    _java_class = "org.apache.spark.ml.regression.GBTRegressor"
     # a placeholder to make it appear in the generated doc
     lossType = Param(Params._dummy(), "lossType",
                      "Loss function which GBT tries to minimize (case-insensitive). " +
@@ -436,6 +439,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  lossType="squared", maxIter=20, stepSize=0.1)
         """
         super(GBTRegressor, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
         #: param for Loss function which GBT tries to minimize (case-insensitive).
         self.lossType = Param(self, "lossType",
                               "Loss function which GBT tries to minimize (case-insensitive). " +
@@ -477,7 +481,7 @@ def setLossType(self, value):
         """
         Sets the value of :py:attr:`lossType`.
         """
-        self.paramMap[self.lossType] = value
+        self._paramMap[self.lossType] = value
         return self
 
     def getLossType(self):
@@ -490,7 +494,7 @@ def setSubsamplingRate(self, value):
         """
         Sets the value of :py:attr:`subsamplingRate`.
         """
-        self.paramMap[self.subsamplingRate] = value
+        self._paramMap[self.subsamplingRate] = value
         return self
 
     def getSubsamplingRate(self):
@@ -503,7 +507,7 @@ def setStepSize(self, value):
         """
         Sets the value of :py:attr:`stepSize`.
         """
-        self.paramMap[self.stepSize] = value
+        self._paramMap[self.stepSize] = value
         return self
 
     def getStepSize(self):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index ba6478dcd58a9..10fe0ef8db38f 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -31,10 +31,12 @@
     import unittest
 
 from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
-from pyspark.sql import DataFrame
-from pyspark.ml.param import Param
+from pyspark.sql import DataFrame, SQLContext
+from pyspark.ml.param import Param, Params
 from pyspark.ml.param.shared import HasMaxIter, HasInputCol
-from pyspark.ml.pipeline import Estimator, Model, Pipeline, Transformer
+from pyspark.ml import Estimator, Model, Pipeline, Transformer
+from pyspark.ml.feature import *
+from pyspark.mllib.linalg import DenseVector
 
 
 class MockDataset(DataFrame):
@@ -43,44 +45,43 @@ def __init__(self):
         self.index = 0
 
 
-class MockTransformer(Transformer):
+class HasFake(Params):
+
+    def __init__(self):
+        super(HasFake, self).__init__()
+        self.fake = Param(self, "fake", "fake param")
+
+    def getFake(self):
+        return self.getOrDefault(self.fake)
+
+
+class MockTransformer(Transformer, HasFake):
 
     def __init__(self):
         super(MockTransformer, self).__init__()
-        self.fake = Param(self, "fake", "fake")
         self.dataset_index = None
-        self.fake_param_value = None
 
-    def transform(self, dataset, params={}):
+    def _transform(self, dataset):
         self.dataset_index = dataset.index
-        if self.fake in params:
-            self.fake_param_value = params[self.fake]
         dataset.index += 1
         return dataset
 
 
-class MockEstimator(Estimator):
+class MockEstimator(Estimator, HasFake):
 
     def __init__(self):
         super(MockEstimator, self).__init__()
-        self.fake = Param(self, "fake", "fake")
         self.dataset_index = None
-        self.fake_param_value = None
-        self.model = None
 
-    def fit(self, dataset, params={}):
+    def _fit(self, dataset):
         self.dataset_index = dataset.index
-        if self.fake in params:
-            self.fake_param_value = params[self.fake]
         model = MockModel()
-        self.model = model
+        self._copyValues(model)
         return model
 
 
-class MockModel(MockTransformer, Model):
-
-    def __init__(self):
-        super(MockModel, self).__init__()
+class MockModel(MockTransformer, Model, HasFake):
+    pass
 
 
 class PipelineTests(PySparkTestCase):
@@ -91,19 +92,17 @@ def test_pipeline(self):
         transformer1 = MockTransformer()
         estimator2 = MockEstimator()
         transformer3 = MockTransformer()
-        pipeline = Pipeline() \
-            .setStages([estimator0, transformer1, estimator2, transformer3])
+        pipeline = Pipeline(stages=[estimator0, transformer1, estimator2, transformer3])
         pipeline_model = pipeline.fit(dataset, {estimator0.fake: 0, transformer1.fake: 1})
-        self.assertEqual(0, estimator0.dataset_index)
-        self.assertEqual(0, estimator0.fake_param_value)
-        model0 = estimator0.model
+        model0, transformer1, model2, transformer3 = pipeline_model.stages
         self.assertEqual(0, model0.dataset_index)
+        self.assertEqual(0, model0.getFake())
         self.assertEqual(1, transformer1.dataset_index)
-        self.assertEqual(1, transformer1.fake_param_value)
-        self.assertEqual(2, estimator2.dataset_index)
-        model2 = estimator2.model
-        self.assertIsNone(model2.dataset_index, "The model produced by the last estimator should "
-                                                "not be called during fit.")
+        self.assertEqual(1, transformer1.getFake())
+        self.assertEqual(2, dataset.index)
+        self.assertIsNone(model2.dataset_index, "The last model shouldn't be called in fit.")
+        self.assertIsNone(transformer3.dataset_index,
+                          "The last transformer shouldn't be called in fit.")
         dataset = pipeline_model.transform(dataset)
         self.assertEqual(2, model0.dataset_index)
         self.assertEqual(3, transformer1.dataset_index)
@@ -129,7 +128,7 @@ def test_param(self):
         maxIter = testParams.maxIter
         self.assertEqual(maxIter.name, "maxIter")
         self.assertEqual(maxIter.doc, "max number of iterations (>= 0)")
-        self.assertTrue(maxIter.parent is testParams)
+        self.assertTrue(maxIter.parent == testParams.uid)
 
     def test_params(self):
         testParams = TestParams()
@@ -139,6 +138,7 @@ def test_params(self):
         params = testParams.params
         self.assertEqual(params, [inputCol, maxIter])
 
+        self.assertTrue(testParams.hasParam(maxIter))
         self.assertTrue(testParams.hasDefault(maxIter))
         self.assertFalse(testParams.isSet(maxIter))
         self.assertTrue(testParams.isDefined(maxIter))
@@ -147,6 +147,7 @@ def test_params(self):
         self.assertTrue(testParams.isSet(maxIter))
         self.assertEquals(testParams.getMaxIter(), 100)
 
+        self.assertTrue(testParams.hasParam(inputCol))
         self.assertFalse(testParams.hasDefault(inputCol))
         self.assertFalse(testParams.isSet(inputCol))
         self.assertFalse(testParams.isDefined(inputCol))
@@ -159,5 +160,45 @@ def test_params(self):
                        "maxIter: max number of iterations (>= 0) (default: 10, current: 100)"]))
 
 
+class FeatureTests(PySparkTestCase):
+
+    def test_binarizer(self):
+        b0 = Binarizer()
+        self.assertListEqual(b0.params, [b0.inputCol, b0.outputCol, b0.threshold])
+        self.assertTrue(all([~b0.isSet(p) for p in b0.params]))
+        self.assertTrue(b0.hasDefault(b0.threshold))
+        self.assertEqual(b0.getThreshold(), 0.0)
+        b0.setParams(inputCol="input", outputCol="output").setThreshold(1.0)
+        self.assertTrue(all([b0.isSet(p) for p in b0.params]))
+        self.assertEqual(b0.getThreshold(), 1.0)
+        self.assertEqual(b0.getInputCol(), "input")
+        self.assertEqual(b0.getOutputCol(), "output")
+
+        b0c = b0.copy({b0.threshold: 2.0})
+        self.assertEqual(b0c.uid, b0.uid)
+        self.assertListEqual(b0c.params, b0.params)
+        self.assertEqual(b0c.getThreshold(), 2.0)
+
+        b1 = Binarizer(threshold=2.0, inputCol="input", outputCol="output")
+        self.assertNotEqual(b1.uid, b0.uid)
+        self.assertEqual(b1.getThreshold(), 2.0)
+        self.assertEqual(b1.getInputCol(), "input")
+        self.assertEqual(b1.getOutputCol(), "output")
+
+    def test_idf(self):
+        sqlContext = SQLContext(self.sc)
+        dataset = sqlContext.createDataFrame([
+            (DenseVector([1.0, 2.0]),),
+            (DenseVector([0.0, 1.0]),),
+            (DenseVector([3.0, 0.2]),)], ["tf"])
+        idf0 = IDF(inputCol="tf")
+        self.assertListEqual(idf0.params, [idf0.inputCol, idf0.minDocFreq, idf0.outputCol])
+        idf0m = idf0.fit(dataset, {idf0.outputCol: "idf"})
+        self.assertEqual(idf0m.uid, idf0.uid,
+                         "Model should inherit the UID from its parent estimator.")
+        output = idf0m.transform(dataset)
+        self.assertIsNotNone(output.head().idf)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 86f4dc7368be0..497841b6c8ce6 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -155,7 +155,7 @@ def setEstimator(self, value):
         """
         Sets the value of :py:attr:`estimator`.
         """
-        self.paramMap[self.estimator] = value
+        self._paramMap[self.estimator] = value
         return self
 
     def getEstimator(self):
@@ -168,7 +168,7 @@ def setEstimatorParamMaps(self, value):
         """
         Sets the value of :py:attr:`estimatorParamMaps`.
         """
-        self.paramMap[self.estimatorParamMaps] = value
+        self._paramMap[self.estimatorParamMaps] = value
         return self
 
     def getEstimatorParamMaps(self):
@@ -181,7 +181,7 @@ def setEvaluator(self, value):
         """
         Sets the value of :py:attr:`evaluator`.
         """
-        self.paramMap[self.evaluator] = value
+        self._paramMap[self.evaluator] = value
         return self
 
     def getEvaluator(self):
@@ -194,7 +194,7 @@ def setNumFolds(self, value):
         """
         Sets the value of :py:attr:`numFolds`.
         """
-        self.paramMap[self.numFolds] = value
+        self._paramMap[self.numFolds] = value
         return self
 
     def getNumFolds(self):
@@ -203,13 +203,12 @@ def getNumFolds(self):
         """
         return self.getOrDefault(self.numFolds)
 
-    def fit(self, dataset, params={}):
-        paramMap = self.extractParamMap(params)
-        est = paramMap[self.estimator]
-        epm = paramMap[self.estimatorParamMaps]
+    def _fit(self, dataset):
+        est = self.getOrDefault(self.estimator)
+        epm = self.getOrDefault(self.estimatorParamMaps)
         numModels = len(epm)
-        eva = paramMap[self.evaluator]
-        nFolds = paramMap[self.numFolds]
+        eva = self.getOrDefault(self.evaluator)
+        nFolds = self.getOrDefault(self.numFolds)
         h = 1.0 / nFolds
         randCol = self.uid + "_rand"
         df = dataset.select("*", rand(0).alias(randCol))
@@ -229,6 +228,15 @@ def fit(self, dataset, params={}):
         bestModel = est.fit(dataset, epm[bestIndex])
         return CrossValidatorModel(bestModel)
 
+    def copy(self, extra={}):
+        newCV = Params.copy(self, extra)
+        if self.isSet(self.estimator):
+            newCV.setEstimator(self.getEstimator().copy(extra))
+        # estimatorParamMaps remain the same
+        if self.isSet(self.evaluator):
+            newCV.setEvaluator(self.getEvaluator().copy(extra))
+        return newCV
+
 
 class CrossValidatorModel(Model):
     """
@@ -240,8 +248,19 @@ def __init__(self, bestModel):
         #: best model from cross validation
         self.bestModel = bestModel
 
-    def transform(self, dataset, params={}):
-        return self.bestModel.transform(dataset, params)
+    def _transform(self, dataset):
+        return self.bestModel.transform(dataset)
+
+    def copy(self, extra={}):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This copies the underlying bestModel,
+        creates a deep copy of the embedded paramMap, and
+        copies the embedded and extra parameters over.
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        return CrossValidatorModel(self.bestModel.copy(extra))
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index d3cb100a9efa5..cee9d67b05325 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -39,9 +39,16 @@ class Identifiable(object):
     """
 
     def __init__(self):
-        #: A unique id for the object. The default implementation
-        #: concatenates the class name, "_", and 8 random hex chars.
-        self.uid = type(self).__name__ + "_" + uuid.uuid4().hex[:8]
+        #: A unique id for the object.
+        self.uid = self._randomUID()
 
     def __repr__(self):
         return self.uid
+
+    @classmethod
+    def _randomUID(cls):
+        """
+        Generate a unique id for the object. The default implementation
+        concatenates the class name, "_", and 12 random hex chars.
+        """
+        return cls.__name__ + "_" + uuid.uuid4().hex[12:]
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index dda6c6aba3049..4419e16184da8 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -45,46 +45,61 @@ class JavaWrapper(Params):
 
     __metaclass__ = ABCMeta
 
-    #: Fully-qualified class name of the wrapped Java component.
-    _java_class = None
+    #: The wrapped Java companion object. Subclasses should initialize
+    #: it properly. The param values in the Java object should be
+    #: synced with the Python wrapper in fit/transform/evaluate/copy.
+    _java_obj = None
 
-    def _java_obj(self):
+    @staticmethod
+    def _new_java_obj(java_class, *args):
         """
-        Returns or creates a Java object.
+        Construct a new Java object.
         """
+        sc = SparkContext._active_spark_context
         java_obj = _jvm()
-        for name in self._java_class.split("."):
+        for name in java_class.split("."):
             java_obj = getattr(java_obj, name)
-        return java_obj()
+        java_args = [_py2java(sc, arg) for arg in args]
+        return java_obj(*java_args)
 
-    def _transfer_params_to_java(self, params, java_obj):
+    def _make_java_param_pair(self, param, value):
         """
-        Transforms the embedded params and additional params to the
-        input Java object.
-        :param params: additional params (overwriting embedded values)
-        :param java_obj: Java object to receive the params
+        Makes a Java parm pair.
+        """
+        sc = SparkContext._active_spark_context
+        param = self._resolveParam(param)
+        java_param = self._java_obj.getParam(param.name)
+        java_value = _py2java(sc, value)
+        return java_param.w(java_value)
+
+    def _transfer_params_to_java(self):
+        """
+        Transforms the embedded params to the companion Java object.
         """
-        paramMap = self.extractParamMap(params)
+        paramMap = self.extractParamMap()
         for param in self.params:
             if param in paramMap:
-                value = paramMap[param]
-                java_param = java_obj.getParam(param.name)
-                java_obj.set(java_param.w(value))
+                pair = self._make_java_param_pair(param, paramMap[param])
+                self._java_obj.set(pair)
+
+    def _transfer_params_from_java(self):
+        """
+        Transforms the embedded params from the companion Java object.
+        """
+        sc = SparkContext._active_spark_context
+        for param in self.params:
+            if self._java_obj.hasParam(param.name):
+                java_param = self._java_obj.getParam(param.name)
+                value = _java2py(sc, self._java_obj.getOrDefault(java_param))
+                self._paramMap[param] = value
 
-    def _empty_java_param_map(self):
+    @staticmethod
+    def _empty_java_param_map():
         """
         Returns an empty Java ParamMap reference.
         """
         return _jvm().org.apache.spark.ml.param.ParamMap()
 
-    def _create_java_param_map(self, params, java_obj):
-        paramMap = self._empty_java_param_map()
-        for param, value in params.items():
-            if param.parent is self:
-                java_param = java_obj.getParam(param.name)
-                paramMap.put(java_param.w(value))
-        return paramMap
-
 
 @inherit_doc
 class JavaEstimator(Estimator, JavaWrapper):
@@ -99,9 +114,9 @@ def _create_model(self, java_model):
         """
         Creates a model from the input Java model reference.
         """
-        return JavaModel(java_model)
+        raise NotImplementedError()
 
-    def _fit_java(self, dataset, params={}):
+    def _fit_java(self, dataset):
         """
         Fits a Java model to the input dataset.
         :param dataset: input dataset, which is an instance of
@@ -109,12 +124,11 @@ def _fit_java(self, dataset, params={}):
         :param params: additional params (overwriting embedded values)
         :return: fitted Java model
         """
-        java_obj = self._java_obj()
-        self._transfer_params_to_java(params, java_obj)
-        return java_obj.fit(dataset._jdf, self._empty_java_param_map())
+        self._transfer_params_to_java()
+        return self._java_obj.fit(dataset._jdf)
 
-    def fit(self, dataset, params={}):
-        java_model = self._fit_java(dataset, params)
+    def _fit(self, dataset):
+        java_model = self._fit_java(dataset)
         return self._create_model(java_model)
 
 
@@ -127,30 +141,47 @@ class JavaTransformer(Transformer, JavaWrapper):
 
     __metaclass__ = ABCMeta
 
-    def transform(self, dataset, params={}):
-        java_obj = self._java_obj()
-        self._transfer_params_to_java(params, java_obj)
-        return DataFrame(java_obj.transform(dataset._jdf), dataset.sql_ctx)
+    def _transform(self, dataset):
+        self._transfer_params_to_java()
+        return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
 
 
 @inherit_doc
 class JavaModel(Model, JavaTransformer):
     """
     Base class for :py:class:`Model`s that wrap Java/Scala
-    implementations.
+    implementations. Subclasses should inherit this class before
+    param mix-ins, because this sets the UID from the Java model.
     """
 
     __metaclass__ = ABCMeta
 
     def __init__(self, java_model):
-        super(JavaTransformer, self).__init__()
-        self._java_model = java_model
+        """
+        Initialize this instance with a Java model object.
+        Subclasses should call this constructor, initialize params,
+        and then call _transformer_params_from_java.
+        """
+        super(JavaModel, self).__init__()
+        self._java_obj = java_model
+        self.uid = java_model.uid()
 
-    def _java_obj(self):
-        return self._java_model
+    def copy(self, extra={}):
+        """
+        Creates a copy of this instance with the same uid and some
+        extra params. This implementation first calls Params.copy and
+        then make a copy of the companion Java model with extra params.
+        So both the Python wrapper and the Java model get copied.
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        that = super(JavaModel, self).copy(extra)
+        that._java_obj = self._java_obj.copy(self._empty_java_param_map())
+        that._transfer_params_to_java()
+        return that
 
     def _call_java(self, name, *args):
-        m = getattr(self._java_model, name)
+        m = getattr(self._java_obj, name)
         sc = SparkContext._active_spark_context
         java_args = [_py2java(sc, arg) for arg in args]
         return _java2py(sc, m(*java_args))
@@ -165,7 +196,11 @@ class JavaEvaluator(Evaluator, JavaWrapper):
 
     __metaclass__ = ABCMeta
 
-    def evaluate(self, dataset, params={}):
-        java_obj = self._java_obj()
-        self._transfer_params_to_java(params, java_obj)
-        return java_obj.evaluate(dataset._jdf, self._empty_java_param_map())
+    def _evaluate(self, dataset):
+        """
+        Evaluates the output.
+        :param dataset: a dataset that contains labels/observations and predictions.
+        :return: evaluation metric
+        """
+        self._transfer_params_to_java()
+        return self._java_obj.evaluate(dataset._jdf)

From aa31e431fc09f0477f1c2351c6275769a31aca90 Mon Sep 17 00:00:00 2001
From: Zhan Zhang <zhazhan@gmail.com>
Date: Mon, 18 May 2015 12:03:27 -0700
Subject: [PATCH 235/320] [SPARK-2883] [SQL] ORC data source for Spark SQL

This PR updates PR #6135 authored by zhzhan from Hortonworks.

----

This PR implements a Spark SQL data source for accessing ORC files.

> **NOTE**
>
> Although ORC is now an Apache TLP, the codebase is still tightly coupled with Hive.  That's why the new ORC data source is under `org.apache.spark.sql.hive` package, and must be used with `HiveContext`.  However, it doesn't require existing Hive installation to access ORC files.

1.  Saving/loading ORC files without contacting Hive metastore

1.  Support for complex data types (i.e. array, map, and struct)

1.  Aware of common optimizations provided by Spark SQL:

    - Column pruning
    - Partitioning pruning
    - Filter push-down

1.  Schema evolution support
1.  Hive metastore table conversion

This PR also include initial work done by scwf from Huawei (PR #3753).

Author: Zhan Zhang <zhazhan@gmail.com>
Author: Cheng Lian <lian@databricks.com>

Closes #6194 from liancheng/polishing-orc and squashes the following commits:

55ecd96 [Cheng Lian] Reorganizes ORC test suites
d4afeed [Cheng Lian] Addresses comments
21ada22 [Cheng Lian] Adds @since and @Experimental annotations
128bd3b [Cheng Lian] ORC filter bug fix
d734496 [Cheng Lian] Polishes the ORC data source
2650a42 [Zhan Zhang] resolve review comments
3c9038e [Zhan Zhang] resolve review comments
7b3c7c5 [Zhan Zhang] save mode fix
f95abfd [Zhan Zhang] reuse test suite
7cc2c64 [Zhan Zhang] predicate fix
4e61c16 [Zhan Zhang] minor change
305418c [Zhan Zhang] orc data source support
---
 .../scala/org/apache/spark/sql/SQLConf.scala  |   7 +-
 .../spark/sql/parquet/ParquetTest.scala       |  61 +---
 .../org/apache/spark/sql/sources/ddl.scala    |  18 +-
 .../apache/spark/sql/test/SQLTestUtils.scala  |  81 +++++
 .../spark/sql/hive/HiveInspectors.scala       |  40 ++-
 .../spark/sql/hive/orc/OrcFileOperator.scala  |  69 ++++
 .../spark/sql/hive/orc/OrcFilters.scala       | 144 +++++++++
 .../spark/sql/hive/orc/OrcRelation.scala      | 290 +++++++++++++++++
 .../hive/orc/OrcHadoopFsRelationSuite.scala   |  59 ++++
 .../hive/orc/OrcPartitionDiscoverySuite.scala | 256 +++++++++++++++
 .../spark/sql/hive/orc/OrcQuerySuite.scala    | 294 ++++++++++++++++++
 .../spark/sql/hive/orc/OrcSourceSuite.scala   | 146 +++++++++
 .../apache/spark/sql/hive/orc/OrcTest.scala   |  82 +++++
 .../sql/sources/hadoopFsRelationSuites.scala  |   6 +-
 14 files changed, 1477 insertions(+), 76 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
 create mode 100644 sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
 create mode 100644 sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
 create mode 100644 sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index f07bb196c11ec..6da910e332e9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -43,6 +43,8 @@ private[spark] object SQLConf {
   val PARQUET_FILTER_PUSHDOWN_ENABLED = "spark.sql.parquet.filterPushdown"
   val PARQUET_USE_DATA_SOURCE_API = "spark.sql.parquet.useDataSourceApi"
 
+  val ORC_FILTER_PUSHDOWN_ENABLED = "spark.sql.orc.filterPushdown"
+
   val HIVE_VERIFY_PARTITIONPATH = "spark.sql.hive.verifyPartitionPath"
 
   val COLUMN_NAME_OF_CORRUPT_RECORD = "spark.sql.columnNameOfCorruptRecord"
@@ -143,6 +145,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
   private[spark] def parquetUseDataSourceApi =
     getConf(PARQUET_USE_DATA_SOURCE_API, "true").toBoolean
 
+  private[spark] def orcFilterPushDown =
+    getConf(ORC_FILTER_PUSHDOWN_ENABLED, "false").toBoolean
+
   /** When true uses verifyPartitionPath to prune the path which is not exists. */
   private[spark] def verifyPartitionPath =
     getConf(HIVE_VERIFY_PARTITIONPATH, "true").toBoolean
@@ -254,7 +259,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
 
   private[spark] def dataFrameRetainGroupColumns: Boolean =
     getConf(DATAFRAME_RETAIN_GROUP_COLUMNS, "true").toBoolean
-  
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
index 7a73b6f1ac601..516ba373f41d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
@@ -21,10 +21,9 @@ import java.io.File
 
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
-import scala.util.Try
 
-import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
-import org.apache.spark.util.Utils
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.{DataFrame, SaveMode}
 
 /**
  * A helper trait that provides convenient facilities for Parquet testing.
@@ -33,54 +32,9 @@ import org.apache.spark.util.Utils
  * convenient to use tuples rather than special case classes when writing test cases/suites.
  * Especially, `Tuple1.apply` can be used to easily wrap a single type/value.
  */
-private[sql] trait ParquetTest {
-  val sqlContext: SQLContext
-
+private[sql] trait ParquetTest extends SQLTestUtils {
   import sqlContext.implicits.{localSeqToDataFrameHolder, rddToDataFrameHolder}
-  import sqlContext.{conf, sparkContext}
-
-  protected def configuration = sparkContext.hadoopConfiguration
-
-  /**
-   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
-   * configurations.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
-    val (keys, values) = pairs.unzip
-    val currentValues = keys.map(key => Try(conf.getConf(key)).toOption)
-    (keys, values).zipped.foreach(conf.setConf)
-    try f finally {
-      keys.zip(currentValues).foreach {
-        case (key, Some(value)) => conf.setConf(key, value)
-        case (key, None) => conf.unsetConf(key)
-      }
-    }
-  }
-
-  /**
-   * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If
-   * a file/directory is created there by `f`, it will be delete after `f` returns.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withTempPath(f: File => Unit): Unit = {
-    val path = Utils.createTempDir()
-    path.delete()
-    try f(path) finally Utils.deleteRecursively(path)
-  }
-
-  /**
-   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
-   * returns.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withTempDir(f: File => Unit): Unit = {
-    val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
-  }
+  import sqlContext.sparkContext
 
   /**
    * Writes `data` to a Parquet file, which is then passed to `f` and will be deleted after `f`
@@ -105,13 +59,6 @@ private[sql] trait ParquetTest {
     withParquetFile(data)(path => f(sqlContext.read.parquet(path)))
   }
 
-  /**
-   * Drops temporary table `tableName` after calling `f`.
-   */
-  protected def withTempTable(tableName: String)(f: => Unit): Unit = {
-    try f finally sqlContext.dropTempTable(tableName)
-  }
-
   /**
    * Writes `data` to a Parquet file, reads it back as a [[DataFrame]] and registers it as a
    * temporary table named `tableName`, then call `f`. The temporary table together with the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 37a569db311ea..a13ab74852ff3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -188,18 +188,20 @@ private[sql] class DDLParser(
 private[sql] object ResolvedDataSource {
 
   private val builtinSources = Map(
-    "jdbc" -> classOf[org.apache.spark.sql.jdbc.DefaultSource],
-    "json" -> classOf[org.apache.spark.sql.json.DefaultSource],
-    "parquet" -> classOf[org.apache.spark.sql.parquet.DefaultSource]
+    "jdbc" -> "org.apache.spark.sql.jdbc.DefaultSource",
+    "json" -> "org.apache.spark.sql.json.DefaultSource",
+    "parquet" -> "org.apache.spark.sql.parquet.DefaultSource",
+    "orc" -> "org.apache.spark.sql.hive.orc.DefaultSource"
   )
 
   /** Given a provider name, look up the data source class definition. */
   def lookupDataSource(provider: String): Class[_] = {
+    val loader = Utils.getContextOrSparkClassLoader
+
     if (builtinSources.contains(provider)) {
-      return builtinSources(provider)
+      return loader.loadClass(builtinSources(provider))
     }
 
-    val loader = Utils.getContextOrSparkClassLoader
     try {
       loader.loadClass(provider)
     } catch {
@@ -208,7 +210,11 @@ private[sql] object ResolvedDataSource {
           loader.loadClass(provider + ".DefaultSource")
         } catch {
           case cnf: java.lang.ClassNotFoundException =>
-            sys.error(s"Failed to load class for data source: $provider")
+            if (provider.startsWith("org.apache.spark.sql.hive.orc")) {
+              sys.error("The ORC data source must be used with Hive support enabled.")
+            } else {
+              sys.error(s"Failed to load class for data source: $provider")
+            }
         }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
new file mode 100644
index 0000000000000..75d290625ec38
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import java.io.File
+
+import scala.util.Try
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.util.Utils
+
+trait SQLTestUtils {
+  val sqlContext: SQLContext
+
+  import sqlContext.{conf, sparkContext}
+
+  protected def configuration = sparkContext.hadoopConfiguration
+
+  /**
+   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
+   * configurations.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map(key => Try(conf.getConf(key)).toOption)
+    (keys, values).zipped.foreach(conf.setConf)
+    try f finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => conf.setConf(key, value)
+        case (key, None) => conf.unsetConf(key)
+      }
+    }
+  }
+
+  /**
+   * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If
+   * a file/directory is created there by `f`, it will be delete after `f` returns.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withTempPath(f: File => Unit): Unit = {
+    val path = Utils.createTempDir()
+    path.delete()
+    try f(path) finally Utils.deleteRecursively(path)
+  }
+
+  /**
+   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withTempDir(f: File => Unit): Unit = {
+    val dir = Utils.createTempDir().getCanonicalFile
+    try f(dir) finally Utils.deleteRecursively(dir)
+  }
+
+  /**
+   * Drops temporary table `tableName` after calling `f`.
+   */
+  protected def withTempTable(tableName: String)(f: => Unit): Unit = {
+    try f finally sqlContext.dropTempTable(tableName)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 7c7666f6e4b7c..0a694c70e4e5c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.hive
 
 import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
-import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.primitive._
+import org.apache.hadoop.hive.serde2.objectinspector.{StructField => HiveStructField, _}
 import org.apache.hadoop.hive.serde2.{io => hiveIo}
 import org.apache.hadoop.{io => hadoopIo}
 
@@ -122,7 +122,7 @@ import scala.collection.JavaConversions._
  *                                 even a normal java object (POJO)
  *   UnionObjectInspector: (tag: Int, object data) (TODO: not supported by SparkSQL yet)
  *
- * 3) ConstantObjectInspector: 
+ * 3) ConstantObjectInspector:
  * Constant object inspector can be either primitive type or Complex type, and it bundles a
  * constant value as its property, usually the value is created when the constant object inspector
  * constructed.
@@ -133,7 +133,7 @@ import scala.collection.JavaConversions._
     }
   }}}
  * Hive provides 3 built-in constant object inspectors:
- * Primitive Object Inspectors: 
+ * Primitive Object Inspectors:
  *     WritableConstantStringObjectInspector
  *     WritableConstantHiveVarcharObjectInspector
  *     WritableConstantHiveDecimalObjectInspector
@@ -147,9 +147,9 @@ import scala.collection.JavaConversions._
  *     WritableConstantByteObjectInspector
  *     WritableConstantBinaryObjectInspector
  *     WritableConstantDateObjectInspector
- * Map Object Inspector: 
+ * Map Object Inspector:
  *     StandardConstantMapObjectInspector
- * List Object Inspector: 
+ * List Object Inspector:
  *     StandardConstantListObjectInspector]]
  * Struct Object Inspector: Hive doesn't provide the built-in constant object inspector for Struct
  * Union Object Inspector: Hive doesn't provide the built-in constant object inspector for Union
@@ -250,9 +250,9 @@ private[hive] trait HiveInspectors {
         poi.getWritableConstantValue.getHiveDecimal)
     case poi: WritableConstantTimestampObjectInspector =>
       poi.getWritableConstantValue.getTimestamp.clone()
-    case poi: WritableConstantIntObjectInspector => 
+    case poi: WritableConstantIntObjectInspector =>
       poi.getWritableConstantValue.get()
-    case poi: WritableConstantDoubleObjectInspector => 
+    case poi: WritableConstantDoubleObjectInspector =>
       poi.getWritableConstantValue.get()
     case poi: WritableConstantBooleanObjectInspector =>
       poi.getWritableConstantValue.get()
@@ -306,7 +306,7 @@ private[hive] trait HiveInspectors {
         // In order to keep backward-compatible, we have to copy the
         // bytes with old apis
         val bw = x.getPrimitiveWritableObject(data)
-        val result = new Array[Byte](bw.getLength()) 
+        val result = new Array[Byte](bw.getLength())
         System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength())
         result
       case x: DateObjectInspector if x.preferWritable() =>
@@ -394,6 +394,30 @@ private[hive] trait HiveInspectors {
       identity[Any]
   }
 
+  /**
+   * Builds specific unwrappers ahead of time according to object inspector
+   * types to avoid pattern matching and branching costs per row.
+   */
+  def unwrapperFor(field: HiveStructField): (Any, MutableRow, Int) => Unit =
+    field.getFieldObjectInspector match {
+      case oi: BooleanObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setBoolean(ordinal, oi.get(value))
+      case oi: ByteObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setByte(ordinal, oi.get(value))
+      case oi: ShortObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setShort(ordinal, oi.get(value))
+      case oi: IntObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setInt(ordinal, oi.get(value))
+      case oi: LongObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setLong(ordinal, oi.get(value))
+      case oi: FloatObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value))
+      case oi: DoubleObjectInspector =>
+        (value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value))
+      case oi =>
+        (value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi)
+    }
+
   /**
    * Converts native catalyst types to the types expected by Hive
    * @param a the value to be wrapped
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
new file mode 100644
index 0000000000000..1e51173a19882
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.io.orc.{OrcFile, Reader}
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
+
+import org.apache.spark.Logging
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.hive.HiveMetastoreTypes
+import org.apache.spark.sql.types.StructType
+
+private[orc] object OrcFileOperator extends Logging{
+  def getFileReader(pathStr: String, config: Option[Configuration] = None ): Reader = {
+    val conf = config.getOrElse(new Configuration)
+    val fspath = new Path(pathStr)
+    val fs = fspath.getFileSystem(conf)
+    val orcFiles = listOrcFiles(pathStr, conf)
+
+    // TODO Need to consider all files when schema evolution is taken into account.
+    OrcFile.createReader(fs, orcFiles.head)
+  }
+
+  def readSchema(path: String, conf: Option[Configuration]): StructType = {
+    val reader = getFileReader(path, conf)
+    val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector]
+    val schema = readerInspector.getTypeName
+    HiveMetastoreTypes.toDataType(schema).asInstanceOf[StructType]
+  }
+
+  def getObjectInspector(path: String, conf: Option[Configuration]): StructObjectInspector = {
+    getFileReader(path, conf).getObjectInspector.asInstanceOf[StructObjectInspector]
+  }
+
+  def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = {
+    val origPath = new Path(pathStr)
+    val fs = origPath.getFileSystem(conf)
+    val path = origPath.makeQualified(fs)
+    val paths = SparkHadoopUtil.get.listLeafStatuses(fs, origPath)
+      .filterNot(_.isDir)
+      .map(_.getPath)
+      .filterNot(_.getName.startsWith("_"))
+      .filterNot(_.getName.startsWith("."))
+
+    if (paths == null || paths.size == 0) {
+      throw new IllegalArgumentException(
+        s"orcFileOperator: path $path does not have valid orc files matching the pattern")
+    }
+
+    paths
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
new file mode 100644
index 0000000000000..250e73a4dba92
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import org.apache.hadoop.hive.common.`type`.{HiveChar, HiveDecimal, HiveVarchar}
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder
+import org.apache.hadoop.hive.serde2.io.DateWritable
+
+import org.apache.spark.Logging
+import org.apache.spark.sql.sources._
+
+/**
+ * It may be optimized by push down partial filters. But we are conservative here.
+ * Because if some filters fail to be parsed, the tree may be corrupted,
+ * and cannot be used anymore.
+ */
+private[orc] object OrcFilters extends Logging {
+  def createFilter(expr: Array[Filter]): Option[SearchArgument] = {
+    expr.reduceOption(And).flatMap { conjunction =>
+      val builder = SearchArgument.FACTORY.newBuilder()
+      buildSearchArgument(conjunction, builder).map(_.build())
+    }
+  }
+
+  private def buildSearchArgument(expression: Filter, builder: Builder): Option[Builder] = {
+    def newBuilder = SearchArgument.FACTORY.newBuilder()
+
+    def isSearchableLiteral(value: Any) = value match {
+      // These are types recognized by the `SearchArgumentImpl.BuilderImpl.boxLiteral()` method.
+      case _: String | _: Long | _: Double | _: DateWritable | _: HiveDecimal | _: HiveChar |
+           _: HiveVarchar | _: Byte | _: Short | _: Integer | _: Float => true
+      case _ => false
+    }
+
+    // lian: I probably missed something here, and had to end up with a pretty weird double-checking
+    // pattern when converting `And`/`Or`/`Not` filters.
+    //
+    // The annoying part is that, `SearchArgument` builder methods like `startAnd()` `startOr()`,
+    // and `startNot()` mutate internal state of the builder instance.  This forces us to translate
+    // all convertible filters with a single builder instance. However, before actually converting a
+    // filter, we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible
+    // filter is found, we may already end up with a builder whose internal state is inconsistent.
+    //
+    // For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and
+    // then try to convert its children.  Say we convert `left` child successfully, but find that
+    // `right` child is inconvertible.  Alas, `b.startAnd()` call can't be rolled back, and `b` is
+    // inconsistent now.
+    //
+    // The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their
+    // children with brand new builders, and only do the actual conversion with the right builder
+    // instance when the children are proven to be convertible.
+    //
+    // P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only.
+    // Usage of builder methods mentioned above can only be found in test code, where all tested
+    // filters are known to be convertible.
+
+    expression match {
+      case And(left, right) =>
+        val tryLeft = buildSearchArgument(left, newBuilder)
+        val tryRight = buildSearchArgument(right, newBuilder)
+
+        val conjunction = for {
+          _ <- tryLeft
+          _ <- tryRight
+          lhs <- buildSearchArgument(left, builder.startAnd())
+          rhs <- buildSearchArgument(right, lhs)
+        } yield rhs.end()
+
+        // For filter `left AND right`, we can still push down `left` even if `right` is not
+        // convertible, and vice versa.
+        conjunction
+          .orElse(tryLeft.flatMap(_ => buildSearchArgument(left, builder)))
+          .orElse(tryRight.flatMap(_ => buildSearchArgument(right, builder)))
+
+      case Or(left, right) =>
+        for {
+          _ <- buildSearchArgument(left, newBuilder)
+          _ <- buildSearchArgument(right, newBuilder)
+          lhs <- buildSearchArgument(left, builder.startOr())
+          rhs <- buildSearchArgument(right, lhs)
+        } yield rhs.end()
+
+      case Not(child) =>
+        for {
+          _ <- buildSearchArgument(child, newBuilder)
+          negate <- buildSearchArgument(child, builder.startNot())
+        } yield negate.end()
+
+      case EqualTo(attribute, value) =>
+        Option(value)
+          .filter(isSearchableLiteral)
+          .map(builder.equals(attribute, _))
+
+      case LessThan(attribute, value) =>
+        Option(value)
+          .filter(isSearchableLiteral)
+          .map(builder.lessThan(attribute, _))
+
+      case LessThanOrEqual(attribute, value) =>
+        Option(value)
+          .filter(isSearchableLiteral)
+          .map(builder.lessThanEquals(attribute, _))
+
+      case GreaterThan(attribute, value) =>
+        Option(value)
+          .filter(isSearchableLiteral)
+          .map(builder.startNot().lessThanEquals(attribute, _).end())
+
+      case GreaterThanOrEqual(attribute, value) =>
+        Option(value)
+          .filter(isSearchableLiteral)
+          .map(builder.startNot().lessThan(attribute, _).end())
+
+      case IsNull(attribute) =>
+        Some(builder.isNull(attribute))
+
+      case IsNotNull(attribute) =>
+        Some(builder.startNot().isNull(attribute).end())
+
+      case In(attribute, values) =>
+        Option(values)
+          .filter(_.forall(isSearchableLiteral))
+          .map(builder.in(attribute, _))
+
+      case _ => None
+    }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
new file mode 100644
index 0000000000000..9708199f07349
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.util.{Objects, Properties}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSerde, OrcSplit}
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
+import org.apache.hadoop.io.{NullWritable, Writable}
+import org.apache.hadoop.mapred.{InputFormat => MapRedInputFormat, JobConf, RecordWriter, Reporter}
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
+import org.apache.spark.rdd.{HadoopRDD, RDD}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.hive.{HiveContext, HiveInspectors, HiveMetastoreTypes, HiveShim}
+import org.apache.spark.sql.sources.{Filter, _}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.{Logging, SerializableWritable}
+
+/* Implicit conversions */
+import scala.collection.JavaConversions._
+
+private[sql] class DefaultSource extends HadoopFsRelationProvider {
+  def createRelation(
+      sqlContext: SQLContext,
+      paths: Array[String],
+      schema: Option[StructType],
+      partitionColumns: Option[StructType],
+      parameters: Map[String, String]): HadoopFsRelation = {
+    assert(
+      sqlContext.isInstanceOf[HiveContext],
+      "The ORC data source can only be used with HiveContext.")
+
+    val partitionSpec = partitionColumns.map(PartitionSpec(_, Seq.empty[Partition]))
+    OrcRelation(paths, parameters, schema, partitionSpec)(sqlContext)
+  }
+}
+
+private[orc] class OrcOutputWriter(
+    path: String,
+    dataSchema: StructType,
+    context: TaskAttemptContext)
+  extends OutputWriter with SparkHadoopMapRedUtil with HiveInspectors {
+
+  private val serializer = {
+    val table = new Properties()
+    table.setProperty("columns", dataSchema.fieldNames.mkString(","))
+    table.setProperty("columns.types", dataSchema.map { f =>
+      HiveMetastoreTypes.toMetastoreType(f.dataType)
+    }.mkString(":"))
+
+    val serde = new OrcSerde
+    serde.initialize(context.getConfiguration, table)
+    serde
+  }
+
+  // Object inspector converted from the schema of the relation to be written.
+  private val structOI = {
+    val typeInfo =
+      TypeInfoUtils.getTypeInfoFromTypeString(
+        HiveMetastoreTypes.toMetastoreType(dataSchema))
+
+    TypeInfoUtils
+      .getStandardJavaObjectInspectorFromTypeInfo(typeInfo)
+      .asInstanceOf[StructObjectInspector]
+  }
+
+  // Used to hold temporary `Writable` fields of the next row to be written.
+  private val reusableOutputBuffer = new Array[Any](dataSchema.length)
+
+  // Used to convert Catalyst values into Hadoop `Writable`s.
+  private val wrappers = structOI.getAllStructFieldRefs.map { ref =>
+    wrapperFor(ref.getFieldObjectInspector)
+  }.toArray
+
+  // `OrcRecordWriter.close()` creates an empty file if no rows are written at all.  We use this
+  // flag to decide whether `OrcRecordWriter.close()` needs to be called.
+  private var recordWriterInstantiated = false
+
+  private lazy val recordWriter: RecordWriter[NullWritable, Writable] = {
+    recordWriterInstantiated = true
+
+    val conf = context.getConfiguration
+    val partition = context.getTaskAttemptID.getTaskID.getId
+    val filename = f"part-r-$partition%05d-${System.currentTimeMillis}%015d.orc"
+
+    new OrcOutputFormat().getRecordWriter(
+      new Path(path, filename).getFileSystem(conf),
+      conf.asInstanceOf[JobConf],
+      new Path(path, filename).toUri.getPath,
+      Reporter.NULL
+    ).asInstanceOf[RecordWriter[NullWritable, Writable]]
+  }
+
+  override def write(row: Row): Unit = {
+    var i = 0
+    while (i < row.length) {
+      reusableOutputBuffer(i) = wrappers(i)(row(i))
+      i += 1
+    }
+
+    recordWriter.write(
+      NullWritable.get(),
+      serializer.serialize(reusableOutputBuffer, structOI))
+  }
+
+  override def close(): Unit = {
+    if (recordWriterInstantiated) {
+      recordWriter.close(Reporter.NULL)
+    }
+  }
+}
+
+@DeveloperApi
+private[sql] case class OrcRelation(
+    override val paths: Array[String],
+    parameters: Map[String, String],
+    maybeSchema: Option[StructType] = None,
+    maybePartitionSpec: Option[PartitionSpec] = None)(
+    @transient val sqlContext: SQLContext)
+  extends HadoopFsRelation(maybePartitionSpec)
+  with Logging {
+
+  override val dataSchema: StructType = maybeSchema.getOrElse {
+    OrcFileOperator.readSchema(
+      paths.head, Some(sqlContext.sparkContext.hadoopConfiguration))
+  }
+
+  override def userDefinedPartitionColumns: Option[StructType] =
+    maybePartitionSpec.map(_.partitionColumns)
+
+  override def needConversion: Boolean = false
+
+  override def equals(other: Any): Boolean = other match {
+    case that: OrcRelation =>
+      paths.toSet == that.paths.toSet &&
+        dataSchema == that.dataSchema &&
+        schema == that.schema &&
+        partitionColumns == that.partitionColumns
+    case _ => false
+  }
+
+  override def hashCode(): Int = {
+    Objects.hashCode(
+      paths.toSet,
+      dataSchema,
+      schema,
+      maybePartitionSpec)
+  }
+
+  override def buildScan(requiredColumns: Array[String],
+      filters: Array[Filter],
+      inputPaths: Array[String]): RDD[Row] = {
+    val output = StructType(requiredColumns.map(dataSchema(_))).toAttributes
+    OrcTableScan(output, this, filters, inputPaths).execute()
+  }
+
+  override def prepareJobForWrite(job: Job): OutputWriterFactory = {
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new OrcOutputWriter(path, dataSchema, context)
+      }
+    }
+  }
+}
+
+private[orc] case class OrcTableScan(
+    attributes: Seq[Attribute],
+    @transient relation: OrcRelation,
+    filters: Array[Filter],
+    inputPaths: Array[String])
+  extends Logging
+  with HiveInspectors {
+
+  @transient private val sqlContext = relation.sqlContext
+
+  private def addColumnIds(
+      output: Seq[Attribute],
+      relation: OrcRelation,
+      conf: Configuration): Unit = {
+    val ids = output.map(a => relation.dataSchema.fieldIndex(a.name): Integer)
+    val (sortedIds, sortedNames) = ids.zip(attributes.map(_.name)).sorted.unzip
+    HiveShim.appendReadColumns(conf, sortedIds, sortedNames)
+  }
+
+  // Transform all given raw `Writable`s into `Row`s.
+  private def fillObject(
+      path: String,
+      conf: Configuration,
+      iterator: Iterator[Writable],
+      nonPartitionKeyAttrs: Seq[(Attribute, Int)],
+      mutableRow: MutableRow): Iterator[Row] = {
+    val deserializer = new OrcSerde
+    val soi = OrcFileOperator.getObjectInspector(path, Some(conf))
+    val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map {
+      case (attr, ordinal) =>
+        soi.getStructFieldRef(attr.name.toLowerCase) -> ordinal
+    }.unzip
+    val unwrappers = fieldRefs.map(unwrapperFor)
+    // Map each tuple to a row object
+    iterator.map { value =>
+      val raw = deserializer.deserialize(value)
+      var i = 0
+      while (i < fieldRefs.length) {
+        val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
+        if (fieldValue == null) {
+          mutableRow.setNullAt(fieldOrdinals(i))
+        } else {
+          unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i))
+        }
+        i += 1
+      }
+      mutableRow: Row
+    }
+  }
+
+  def execute(): RDD[Row] = {
+    val job = new Job(sqlContext.sparkContext.hadoopConfiguration)
+    val conf = job.getConfiguration
+
+    // Tries to push down filters if ORC filter push-down is enabled
+    if (sqlContext.conf.orcFilterPushDown) {
+      OrcFilters.createFilter(filters).foreach { f =>
+        conf.set(OrcTableScan.SARG_PUSHDOWN, f.toKryo)
+        conf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true)
+      }
+    }
+
+    // Sets requested columns
+    addColumnIds(attributes, relation, conf)
+
+    if (inputPaths.nonEmpty) {
+      FileInputFormat.setInputPaths(job, inputPaths.map(new Path(_)): _*)
+    }
+
+    val inputFormatClass =
+      classOf[OrcInputFormat]
+        .asInstanceOf[Class[_ <: MapRedInputFormat[NullWritable, Writable]]]
+
+    val rdd = sqlContext.sparkContext.hadoopRDD(
+      conf.asInstanceOf[JobConf],
+      inputFormatClass,
+      classOf[NullWritable],
+      classOf[Writable]
+    ).asInstanceOf[HadoopRDD[NullWritable, Writable]]
+
+    val wrappedConf = new SerializableWritable(conf)
+
+    rdd.mapPartitionsWithInputSplit { case (split: OrcSplit, iterator) =>
+      val mutableRow = new SpecificMutableRow(attributes.map(_.dataType))
+      fillObject(
+        split.getPath.toString,
+        wrappedConf.value,
+        iterator.map(_._2),
+        attributes.zipWithIndex,
+        mutableRow)
+    }
+  }
+}
+
+private[orc] object OrcTableScan {
+  // This constant duplicates `OrcInputFormat.SARG_PUSHDOWN`, which is unfortunately not public.
+  private[orc] val SARG_PUSHDOWN = "sarg.pushdown"
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
new file mode 100644
index 0000000000000..080af5bb23c16
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.sources.HadoopFsRelationTest
+import org.apache.spark.sql.types._
+
+class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
+  override val dataSourceName: String = classOf[DefaultSource].getCanonicalName
+
+  import sqlContext._
+  import sqlContext.implicits._
+
+  test("save()/load() - partitioned table - simple queries - partition columns in data") {
+    withTempDir { file =>
+      val basePath = new Path(file.getCanonicalPath)
+      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
+      val qualifiedBasePath = fs.makeQualified(basePath)
+
+      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
+        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
+        sparkContext
+          .parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1))
+          .toDF("a", "b", "p1")
+          .write
+          .format("orc")
+          .save(partitionDir.toString)
+      }
+
+      val dataSchemaWithPartition =
+        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
+
+      checkQueries(
+        load(
+          source = dataSourceName,
+          options = Map(
+            "path" -> file.getCanonicalPath,
+            "dataSchema" -> dataSchemaWithPartition.json)))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
new file mode 100644
index 0000000000000..88c99e35260d9
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.util.Utils
+import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+
+// The data where the partitioning key exists only in the directory structure.
+case class OrcParData(intField: Int, stringField: String)
+
+// The data that also includes the partitioning key
+case class OrcParDataWithKey(intField: Int, pi: Int, stringField: String, ps: String)
+
+// TODO This test suite duplicates ParquetPartitionDiscoverySuite a lot
+class OrcPartitionDiscoverySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll {
+  val defaultPartitionName = ConfVars.DEFAULTPARTITIONNAME.defaultVal
+
+  def withTempDir(f: File => Unit): Unit = {
+    val dir = Utils.createTempDir().getCanonicalFile
+    try f(dir) finally Utils.deleteRecursively(dir)
+  }
+
+  def makeOrcFile[T <: Product: ClassTag: TypeTag](
+      data: Seq[T], path: File): Unit = {
+    data.toDF().write.format("orc").mode("overwrite").save(path.getCanonicalPath)
+  }
+
+
+  def makeOrcFile[T <: Product: ClassTag: TypeTag](
+      df: DataFrame, path: File): Unit = {
+    df.write.format("orc").mode("overwrite").save(path.getCanonicalPath)
+  }
+
+  protected def withTempTable(tableName: String)(f: => Unit): Unit = {
+    try f finally TestHive.dropTempTable(tableName)
+  }
+
+  protected def makePartitionDir(
+      basePath: File,
+      defaultPartitionName: String,
+      partitionCols: (String, Any)*): File = {
+    val partNames = partitionCols.map { case (k, v) =>
+      val valueString = if (v == null || v == "") defaultPartitionName else v.toString
+      s"$k=$valueString"
+    }
+
+    val partDir = partNames.foldLeft(basePath) { (parent, child) =>
+      new File(parent, child)
+    }
+
+    assert(partDir.mkdirs(), s"Couldn't create directory $partDir")
+    partDir
+  }
+
+  test("read partitioned table - normal case") {
+    withTempDir { base =>
+      for {
+        pi <- Seq(1, 2)
+        ps <- Seq("foo", "bar")
+      } {
+        makeOrcFile(
+          (1 to 10).map(i => OrcParData(i, i.toString)),
+          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+      }
+
+      read.format("orc").load(base.getCanonicalPath).registerTempTable("t")
+
+      withTempTable("t") {
+        checkAnswer(
+          sql("SELECT * FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+            ps <- Seq("foo", "bar")
+          } yield Row(i, i.toString, pi, ps))
+
+        checkAnswer(
+          sql("SELECT intField, pi FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+            _ <- Seq("foo", "bar")
+          } yield Row(i, pi))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE pi = 1"),
+          for {
+            i <- 1 to 10
+            ps <- Seq("foo", "bar")
+          } yield Row(i, i.toString, 1, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE ps = 'foo'"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+          } yield Row(i, i.toString, pi, "foo"))
+      }
+    }
+  }
+
+  test("read partitioned table - partition key included in orc file") {
+    withTempDir { base =>
+      for {
+        pi <- Seq(1, 2)
+        ps <- Seq("foo", "bar")
+      } {
+        makeOrcFile(
+          (1 to 10).map(i => OrcParDataWithKey(i, pi, i.toString, ps)),
+          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+      }
+
+      read.format("orc").load(base.getCanonicalPath).registerTempTable("t")
+
+      withTempTable("t") {
+        checkAnswer(
+          sql("SELECT * FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+            ps <- Seq("foo", "bar")
+          } yield Row(i, pi, i.toString, ps))
+
+        checkAnswer(
+          sql("SELECT intField, pi FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+            _ <- Seq("foo", "bar")
+          } yield Row(i, pi))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE pi = 1"),
+          for {
+            i <- 1 to 10
+            ps <- Seq("foo", "bar")
+          } yield Row(i, 1, i.toString, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE ps = 'foo'"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+          } yield Row(i, pi, i.toString, "foo"))
+      }
+    }
+  }
+
+
+  test("read partitioned table - with nulls") {
+    withTempDir { base =>
+      for {
+      // Must be `Integer` rather than `Int` here. `null.asInstanceOf[Int]` results in a zero...
+        pi <- Seq(1, null.asInstanceOf[Integer])
+        ps <- Seq("foo", null.asInstanceOf[String])
+      } {
+        makeOrcFile(
+          (1 to 10).map(i => OrcParData(i, i.toString)),
+          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+      }
+
+      read
+        .format("orc")
+        .option(ConfVars.DEFAULTPARTITIONNAME.varname, defaultPartitionName)
+        .load(base.getCanonicalPath)
+        .registerTempTable("t")
+
+      withTempTable("t") {
+        checkAnswer(
+          sql("SELECT * FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, null.asInstanceOf[Integer])
+            ps <- Seq("foo", null.asInstanceOf[String])
+          } yield Row(i, i.toString, pi, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE pi IS NULL"),
+          for {
+            i <- 1 to 10
+            ps <- Seq("foo", null.asInstanceOf[String])
+          } yield Row(i, i.toString, null, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE ps IS NULL"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, null.asInstanceOf[Integer])
+          } yield Row(i, i.toString, pi, null))
+      }
+    }
+  }
+
+  test("read partitioned table - with nulls and partition keys are included in Orc file") {
+    withTempDir { base =>
+      for {
+        pi <- Seq(1, 2)
+        ps <- Seq("foo", null.asInstanceOf[String])
+      } {
+        makeOrcFile(
+          (1 to 10).map(i => OrcParDataWithKey(i, pi, i.toString, ps)),
+          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+      }
+
+      read
+        .format("orc")
+        .option(ConfVars.DEFAULTPARTITIONNAME.varname, defaultPartitionName)
+        .load(base.getCanonicalPath)
+        .registerTempTable("t")
+
+      withTempTable("t") {
+        checkAnswer(
+          sql("SELECT * FROM t"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+            ps <- Seq("foo", null.asInstanceOf[String])
+          } yield Row(i, pi, i.toString, ps))
+
+        checkAnswer(
+          sql("SELECT * FROM t WHERE ps IS NULL"),
+          for {
+            i <- 1 to 10
+            pi <- Seq(1, 2)
+          } yield Row(i, pi, i.toString, null))
+      }
+    }
+  }
+}
+
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
new file mode 100644
index 0000000000000..cdd6e705f4a2c
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind
+import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+case class AllDataTypesWithNonPrimitiveType(
+    stringField: String,
+    intField: Int,
+    longField: Long,
+    floatField: Float,
+    doubleField: Double,
+    shortField: Short,
+    byteField: Byte,
+    booleanField: Boolean,
+    array: Seq[Int],
+    arrayContainsNull: Seq[Option[Int]],
+    map: Map[Int, Long],
+    mapValueContainsNull: Map[Int, Option[Long]],
+    data: (Seq[Int], (Int, String)))
+
+case class BinaryData(binaryData: Array[Byte])
+
+case class Contact(name: String, phone: String)
+
+case class Person(name: String, age: Int, contacts: Seq[Contact])
+
+class OrcQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll with OrcTest {
+  override val sqlContext = TestHive
+
+  import TestHive.read
+
+  def getTempFilePath(prefix: String, suffix: String = ""): File = {
+    val tempFile = File.createTempFile(prefix, suffix)
+    tempFile.delete()
+    tempFile
+  }
+
+  test("Read/write All Types") {
+    val data = (0 to 255).map { i =>
+      (s"$i", i, i.toLong, i.toFloat, i.toDouble, i.toShort, i.toByte, i % 2 == 0)
+    }
+
+    withOrcFile(data) { file =>
+      checkAnswer(
+        read.format("orc").load(file),
+        data.toDF().collect())
+    }
+  }
+
+  test("Read/write binary data") {
+    withOrcFile(BinaryData("test".getBytes("utf8")) :: Nil) { file =>
+      val bytes = read.format("orc").load(file).head().getAs[Array[Byte]](0)
+      assert(new String(bytes, "utf8") === "test")
+    }
+  }
+
+  test("Read/write all types with non-primitive type") {
+    val data = (0 to 255).map { i =>
+      AllDataTypesWithNonPrimitiveType(
+        s"$i", i, i.toLong, i.toFloat, i.toDouble, i.toShort, i.toByte, i % 2 == 0,
+        0 until i,
+        (0 until i).map(Option(_).filter(_ % 3 == 0)),
+        (0 until i).map(i => i -> i.toLong).toMap,
+        (0 until i).map(i => i -> Option(i.toLong)).toMap + (i -> None),
+        (0 until i, (i, s"$i")))
+    }
+
+    withOrcFile(data) { file =>
+      checkAnswer(
+        read.format("orc").load(file),
+        data.toDF().collect())
+    }
+  }
+
+  test("Creating case class RDD table") {
+    val data = (1 to 100).map(i => (i, s"val_$i"))
+    sparkContext.parallelize(data).toDF().registerTempTable("t")
+    withTempTable("t") {
+      checkAnswer(sql("SELECT * FROM t"), data.toDF().collect())
+    }
+  }
+
+  test("Simple selection form ORC table") {
+    val data = (1 to 10).map { i =>
+      Person(s"name_$i", i, (0 to 1).map { m => Contact(s"contact_$m", s"phone_$m") })
+    }
+
+    withOrcTable(data, "t") {
+      // ppd:
+      // leaf-0 = (LESS_THAN_EQUALS age 5)
+      // expr = leaf-0
+      assert(sql("SELECT name FROM t WHERE age <= 5").count() === 5)
+
+      // ppd:
+      // leaf-0 = (LESS_THAN_EQUALS age 5)
+      // expr = (not leaf-0)
+      assertResult(10) {
+        sql("SELECT name, contacts FROM t where age > 5")
+          .flatMap(_.getAs[Seq[_]]("contacts"))
+          .count()
+      }
+
+      // ppd:
+      // leaf-0 = (LESS_THAN_EQUALS age 5)
+      // leaf-1 = (LESS_THAN age 8)
+      // expr = (and (not leaf-0) leaf-1)
+      {
+        val df = sql("SELECT name, contacts FROM t WHERE age > 5 AND age < 8")
+        assert(df.count() === 2)
+        assertResult(4) {
+          df.flatMap(_.getAs[Seq[_]]("contacts")).count()
+        }
+      }
+
+      // ppd:
+      // leaf-0 = (LESS_THAN age 2)
+      // leaf-1 = (LESS_THAN_EQUALS age 8)
+      // expr = (or leaf-0 (not leaf-1))
+      {
+        val df = sql("SELECT name, contacts FROM t WHERE age < 2 OR age > 8")
+        assert(df.count() === 3)
+        assertResult(6) {
+          df.flatMap(_.getAs[Seq[_]]("contacts")).count()
+        }
+      }
+    }
+  }
+
+  test("save and load case class RDD with `None`s as orc") {
+    val data = (
+      None: Option[Int],
+      None: Option[Long],
+      None: Option[Float],
+      None: Option[Double],
+      None: Option[Boolean]
+    ) :: Nil
+
+    withOrcFile(data) { file =>
+      checkAnswer(
+        read.format("orc").load(file),
+        Row(Seq.fill(5)(null): _*))
+    }
+  }
+
+  // We only support zlib in Hive 0.12.0 now
+  test("Default compression options for writing to an ORC file") {
+    withOrcFile((1 to 100).map(i => (i, s"val_$i"))) { file =>
+      assertResult(CompressionKind.ZLIB) {
+        OrcFileOperator.getFileReader(file).getCompression
+      }
+    }
+  }
+
+  // Following codec is supported in hive-0.13.1, ignore it now
+  ignore("Other compression options for writing to an ORC file - 0.13.1 and above") {
+    val data = (1 to 100).map(i => (i, s"val_$i"))
+    val conf = sparkContext.hadoopConfiguration
+
+    conf.set(ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, "SNAPPY")
+    withOrcFile(data) { file =>
+      assertResult(CompressionKind.SNAPPY) {
+        OrcFileOperator.getFileReader(file).getCompression
+      }
+    }
+
+    conf.set(ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, "NONE")
+    withOrcFile(data) { file =>
+      assertResult(CompressionKind.NONE) {
+        OrcFileOperator.getFileReader(file).getCompression
+      }
+    }
+
+    conf.set(ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, "LZO")
+    withOrcFile(data) { file =>
+      assertResult(CompressionKind.LZO) {
+        OrcFileOperator.getFileReader(file).getCompression
+      }
+    }
+  }
+
+  test("simple select queries") {
+    withOrcTable((0 until 10).map(i => (i, i.toString)), "t") {
+      checkAnswer(
+        sql("SELECT `_1` FROM t where t.`_1` > 5"),
+        (6 until 10).map(Row.apply(_)))
+
+      checkAnswer(
+        sql("SELECT `_1` FROM t as tmp where tmp.`_1` < 5"),
+        (0 until 5).map(Row.apply(_)))
+    }
+  }
+
+  test("appending") {
+    val data = (0 until 10).map(i => (i, i.toString))
+    createDataFrame(data).toDF("c1", "c2").registerTempTable("tmp")
+    withOrcTable(data, "t") {
+      sql("INSERT INTO TABLE t SELECT * FROM tmp")
+      checkAnswer(table("t"), (data ++ data).map(Row.fromTuple))
+    }
+    catalog.unregisterTable(Seq("tmp"))
+  }
+
+  test("overwriting") {
+    val data = (0 until 10).map(i => (i, i.toString))
+    createDataFrame(data).toDF("c1", "c2").registerTempTable("tmp")
+    withOrcTable(data, "t") {
+      sql("INSERT OVERWRITE TABLE t SELECT * FROM tmp")
+      checkAnswer(table("t"), data.map(Row.fromTuple))
+    }
+    catalog.unregisterTable(Seq("tmp"))
+  }
+
+  test("self-join") {
+    // 4 rows, cells of column 1 of row 2 and row 4 are null
+    val data = (1 to 4).map { i =>
+      val maybeInt = if (i % 2 == 0) None else Some(i)
+      (maybeInt, i.toString)
+    }
+
+    withOrcTable(data, "t") {
+      val selfJoin = sql("SELECT * FROM t x JOIN t y WHERE x.`_1` = y.`_1`")
+      val queryOutput = selfJoin.queryExecution.analyzed.output
+
+      assertResult(4, "Field count mismatches")(queryOutput.size)
+      assertResult(2, "Duplicated expression ID in query plan:\n $selfJoin") {
+        queryOutput.filter(_.name == "_1").map(_.exprId).size
+      }
+
+      checkAnswer(selfJoin, List(Row(1, "1", 1, "1"), Row(3, "3", 3, "3")))
+    }
+  }
+
+  test("nested data - struct with array field") {
+    val data = (1 to 10).map(i => Tuple1((i, Seq("val_$i"))))
+    withOrcTable(data, "t") {
+      checkAnswer(sql("SELECT `_1`.`_2`[0] FROM t"), data.map {
+        case Tuple1((_, Seq(string))) => Row(string)
+      })
+    }
+  }
+
+  test("nested data - array of struct") {
+    val data = (1 to 10).map(i => Tuple1(Seq(i -> "val_$i")))
+    withOrcTable(data, "t") {
+      checkAnswer(sql("SELECT `_1`[0].`_2` FROM t"), data.map {
+        case Tuple1(Seq((_, string))) => Row(string)
+      })
+    }
+  }
+
+  test("columns only referenced by pushed down filters should remain") {
+    withOrcTable((1 to 10).map(Tuple1.apply), "t") {
+      checkAnswer(sql("SELECT `_1` FROM t WHERE `_1` < 10"), (1 to 9).map(Row.apply(_)))
+    }
+  }
+
+  test("SPARK-5309 strings stored using dictionary compression in orc") {
+    withOrcTable((0 until 1000).map(i => ("same", "run_" + i / 100, 1)), "t") {
+      checkAnswer(
+        sql("SELECT `_1`, `_2`, SUM(`_3`) FROM t GROUP BY `_1`, `_2`"),
+        (0 until 10).map(i => Row("same", "run_" + i, 100)))
+
+      checkAnswer(
+        sql("SELECT `_1`, `_2`, SUM(`_3`) FROM t WHERE `_2` = 'run_5' GROUP BY `_1`, `_2`"),
+        List(Row("same", "run_5", 100)))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
new file mode 100644
index 0000000000000..82e08caf46457
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.{QueryTest, Row}
+
+case class OrcData(intField: Int, stringField: String)
+
+abstract class OrcSuite extends QueryTest with BeforeAndAfterAll {
+  var orcTableDir: File = null
+  var orcTableAsDir: File = null
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    orcTableAsDir = File.createTempFile("orctests", "sparksql")
+    orcTableAsDir.delete()
+    orcTableAsDir.mkdir()
+
+    // Hack: to prepare orc data files using hive external tables
+    orcTableDir = File.createTempFile("orctests", "sparksql")
+    orcTableDir.delete()
+    orcTableDir.mkdir()
+    import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+    sparkContext
+      .makeRDD(1 to 10)
+      .map(i => OrcData(i, s"part-$i"))
+      .toDF()
+      .registerTempTable(s"orc_temp_table")
+
+    sql(
+      s"""CREATE EXTERNAL TABLE normal_orc(
+         |  intField INT,
+         |  stringField STRING
+         |)
+         |STORED AS ORC
+         |LOCATION '${orcTableAsDir.getCanonicalPath}'
+       """.stripMargin)
+
+    sql(
+      s"""INSERT INTO TABLE normal_orc
+         |SELECT intField, stringField FROM orc_temp_table
+       """.stripMargin)
+  }
+
+  override def afterAll(): Unit = {
+    orcTableDir.delete()
+    orcTableAsDir.delete()
+  }
+
+  test("create temporary orc table") {
+    checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_source"), Row(10))
+
+    checkAnswer(
+      sql("SELECT * FROM normal_orc_source"),
+      (1 to 10).map(i => Row(i, s"part-$i")))
+
+    checkAnswer(
+      sql("SELECT * FROM normal_orc_source where intField > 5"),
+      (6 to 10).map(i => Row(i, s"part-$i")))
+
+    checkAnswer(
+      sql("SELECT COUNT(intField), stringField FROM normal_orc_source GROUP BY stringField"),
+      (1 to 10).map(i => Row(1, s"part-$i")))
+  }
+
+  test("create temporary orc table as") {
+    checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_as_source"), Row(10))
+
+    checkAnswer(
+      sql("SELECT * FROM normal_orc_source"),
+      (1 to 10).map(i => Row(i, s"part-$i")))
+
+    checkAnswer(
+      sql("SELECT * FROM normal_orc_source WHERE intField > 5"),
+      (6 to 10).map(i => Row(i, s"part-$i")))
+
+    checkAnswer(
+      sql("SELECT COUNT(intField), stringField FROM normal_orc_source GROUP BY stringField"),
+      (1 to 10).map(i => Row(1, s"part-$i")))
+  }
+
+  test("appending insert") {
+    sql("INSERT INTO TABLE normal_orc_source SELECT * FROM orc_temp_table WHERE intField > 5")
+
+    checkAnswer(
+      sql("SELECT * FROM normal_orc_source"),
+      (1 to 5).map(i => Row(i, s"part-$i")) ++ (6 to 10).flatMap { i =>
+        Seq.fill(2)(Row(i, s"part-$i"))
+      })
+  }
+
+  test("overwrite insert") {
+    sql(
+      """INSERT OVERWRITE TABLE normal_orc_as_source
+        |SELECT * FROM orc_temp_table WHERE intField > 5
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT * FROM normal_orc_as_source"),
+      (6 to 10).map(i => Row(i, s"part-$i")))
+  }
+}
+
+class OrcSourceSuite extends OrcSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    sql(
+      s"""CREATE TEMPORARY TABLE normal_orc_source
+         |USING org.apache.spark.sql.hive.orc
+         |OPTIONS (
+         |  PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}'
+         |)
+       """.stripMargin)
+
+    sql(
+      s"""CREATE TEMPORARY TABLE normal_orc_as_source
+         |USING org.apache.spark.sql.hive.orc
+         |OPTIONS (
+         |  PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}'
+         |)
+       """.stripMargin)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala
new file mode 100644
index 0000000000000..750f0b04aaa87
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql._
+
+private[sql] trait OrcTest extends SQLTestUtils {
+  protected def hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+  import sqlContext.sparkContext
+  import sqlContext.implicits._
+
+  /**
+   * Writes `data` to a Orc file, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   */
+  protected def withOrcFile[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T])
+      (f: String => Unit): Unit = {
+    withTempPath { file =>
+      sparkContext.parallelize(data).toDF().write.format("orc").save(file.getCanonicalPath)
+      f(file.getCanonicalPath)
+    }
+  }
+
+  /**
+   * Writes `data` to a Orc file and reads it back as a [[DataFrame]],
+   * which is then passed to `f`. The Orc file will be deleted after `f` returns.
+   */
+  protected def withOrcDataFrame[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T])
+      (f: DataFrame => Unit): Unit = {
+    withOrcFile(data)(path => f(hiveContext.read.format("orc").load(path)))
+  }
+
+  /**
+   * Writes `data` to a Orc file, reads it back as a [[DataFrame]] and registers it as a
+   * temporary table named `tableName`, then call `f`. The temporary table together with the
+   * Orc file will be dropped/deleted after `f` returns.
+   */
+  protected def withOrcTable[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T], tableName: String)
+      (f: => Unit): Unit = {
+    withOrcDataFrame(data) { df =>
+      hiveContext.registerDataFrameAsTable(df, tableName)
+      withTempTable(tableName)(f)
+    }
+  }
+
+  protected def makeOrcFile[T <: Product: ClassTag: TypeTag](
+      data: Seq[T], path: File): Unit = {
+    data.toDF().write.format("orc").mode(SaveMode.Overwrite).save(path.getCanonicalPath)
+  }
+
+  protected def makeOrcFile[T <: Product: ClassTag: TypeTag](
+      df: DataFrame, path: File): Unit = {
+    df.write.format("orc").mode(SaveMode.Overwrite).save(path.getCanonicalPath)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
index 9d9b436cabe3c..ad4a4826c6b45 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -23,12 +23,10 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.parquet.ParquetTest
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 
-// TODO Don't extend ParquetTest
-// This test suite extends ParquetTest for some convenient utility methods. These methods should be
-// moved to some more general places, maybe QueryTest.
-class HadoopFsRelationTest extends QueryTest with ParquetTest {
+abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils {
   override val sqlContext: SQLContext = TestHive
 
   import sqlContext._

From fc2480ed13742a99470b5012ca3a75ab91e5a5e5 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Mon, 18 May 2015 12:05:14 -0700
Subject: [PATCH 236/320] [SPARK-7631] [SQL] treenode argString should not
 print children

spark-sql>
> explain extended
> select * from (
> select key from src union all
> select key from src) t;

now the spark plan will print children in argString
```
== Physical Plan ==
Union[ HiveTableScan key#1, (MetastoreRelation default, src, None), None,
HiveTableScan key#3, (MetastoreRelation default, src, None), None]
HiveTableScan key#1, (MetastoreRelation default, src, None), None
HiveTableScan key#3, (MetastoreRelation default, src, None), None
```

after this patch:
```
== Physical Plan ==
Union
 HiveTableScan [key#1], (MetastoreRelation default, src, None), None
 HiveTableScan [key#3], (MetastoreRelation default, src, None), None
```

I have tested this locally

Author: scwf <wangfei1@huawei.com>

Closes #6144 from scwf/fix-argString and squashes the following commits:

1a642e0 [scwf] fix treenode argString
---
 .../scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index bc2ad34523d2c..28e15566f0961 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -385,6 +385,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
   def argString: String = productIterator.flatMap {
     case tn: TreeNode[_] if children contains tn => Nil
     case tn: TreeNode[_] if tn.toString contains "\n" => s"(${tn.simpleString})" :: Nil
+    case seq: Seq[BaseType] if seq.toSet.subsetOf(children.toSet) => Nil
     case seq: Seq[_] => seq.mkString("[", ",", "]") :: Nil
     case set: Set[_] => set.mkString("{", ",", "}") :: Nil
     case other => other :: Nil

From 103c863c2ef3d9e6186cfc7d95251a9515e9f180 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Mon, 18 May 2015 12:08:28 -0700
Subject: [PATCH 237/320] [SPARK-7269] [SQL] Incorrect analysis for
 aggregation(use semanticEquals)

A modified version of https://github.com/apache/spark/pull/6110, use `semanticEquals` to make it more efficient.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #6173 from cloud-fan/7269 and squashes the following commits:

e4a3cc7 [Wenchen Fan] address comments
cc02045 [Wenchen Fan] consider elements length equal
d7ff8f4 [Wenchen Fan] fix 7269
---
 .../sql/catalyst/analysis/Analyzer.scala      | 29 +++++--------------
 .../sql/catalyst/analysis/CheckAnalysis.scala |  4 +--
 .../sql/catalyst/expressions/Expression.scala | 13 +++++++++
 .../expressions/namedExpressions.scala        |  5 ++++
 .../sql/catalyst/planning/patterns.scala      |  5 ++--
 .../sql/hive/execution/SQLQuerySuite.scala    | 18 ++++++++++++
 6 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0b6e1d44b9c4d..dfa4215f2efe5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
@@ -142,25 +141,6 @@ class Analyzer(
   }
 
   object ResolveGroupingAnalytics extends Rule[LogicalPlan] {
-    /**
-     * Extract attribute set according to the grouping id
-     * @param bitmask bitmask to represent the selected of the attribute sequence
-     * @param exprs the attributes in sequence
-     * @return the attributes of non selected specified via bitmask (with the bit set to 1)
-     */
-    private def buildNonSelectExprSet(bitmask: Int, exprs: Seq[Expression])
-    : OpenHashSet[Expression] = {
-      val set = new OpenHashSet[Expression](2)
-
-      var bit = exprs.length - 1
-      while (bit >= 0) {
-        if (((bitmask >> bit) & 1) == 0) set.add(exprs(bit))
-        bit -= 1
-      }
-
-      set
-    }
-
     /*
      *  GROUP BY a, b, c WITH ROLLUP
      *  is equivalent to
@@ -197,10 +177,15 @@ class Analyzer(
 
       g.bitmasks.foreach { bitmask =>
         // get the non selected grouping attributes according to the bit mask
-        val nonSelectedGroupExprSet = buildNonSelectExprSet(bitmask, g.groupByExprs)
+        val nonSelectedGroupExprs = ArrayBuffer.empty[Expression]
+        var bit = g.groupByExprs.length - 1
+        while (bit >= 0) {
+          if (((bitmask >> bit) & 1) == 0) nonSelectedGroupExprs += g.groupByExprs(bit)
+          bit -= 1
+        }
 
         val substitution = (g.child.output :+ g.gid).map(expr => expr transformDown {
-          case x: Expression if nonSelectedGroupExprSet.contains(x) =>
+          case x: Expression if nonSelectedGroupExprs.find(_ semanticEquals x).isDefined =>
             // if the input attribute in the Invalid Grouping Expression set of for this group
             // replace it with constant null
             Literal.create(null, expr.dataType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f104e742c90fe..06a0504359f6e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -86,12 +86,12 @@ trait CheckAnalysis {
           case Aggregate(groupingExprs, aggregateExprs, child) =>
             def checkValidAggregateExpression(expr: Expression): Unit = expr match {
               case _: AggregateExpression => // OK
-              case e: Attribute if !groupingExprs.contains(e) =>
+              case e: Attribute if groupingExprs.find(_ semanticEquals e).isEmpty =>
                 failAnalysis(
                   s"expression '${e.prettyString}' is neither present in the group by, " +
                     s"nor is it an aggregate function. " +
                     "Add to group by or wrap in first() if you don't care which value you get.")
-              case e if groupingExprs.contains(e) => // OK
+              case e if groupingExprs.find(_ semanticEquals e).isDefined => // OK
               case e if e.references.isEmpty => // OK
               case e => e.children.foreach(checkValidAggregateExpression)
             }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 0837a3179d897..c7ae9da7fce49 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -76,6 +76,19 @@ abstract class Expression extends TreeNode[Expression] {
       case u: UnresolvedAttribute => PrettyAttribute(u.name)
     }.toString
   }
+
+  /**
+   * Returns true when two expressions will always compute the same result, even if they differ
+   * cosmetically (i.e. capitalization of names in attributes may be different).
+   */
+  def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && {
+    val elements1 = this.productIterator.toSeq
+    val elements2 = other.asInstanceOf[Product].productIterator.toSeq
+    elements1.length == elements2.length && elements1.zip(elements2).forall {
+      case (e1: Expression, e2: Expression) => e1 semanticEquals e2
+      case (i1, i2) => i1 == i2
+    }
+  }
 }
 
 abstract class BinaryExpression extends Expression with trees.BinaryNode[Expression] {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index a9170589f8c6c..50be26d0b08b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -181,6 +181,11 @@ case class AttributeReference(
     case _ => false
   }
 
+  override def semanticEquals(other: Expression): Boolean = other match {
+    case ar: AttributeReference => sameRef(ar)
+    case _ => false
+  }
+
   override def hashCode: Int = {
     // See http://stackoverflow.com/questions/113511/hash-code-implementation
     var h = 17
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index cd54d04814ea4..1dd75a8846303 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -159,9 +159,10 @@ object PartialAggregation {
             // Should trim aliases around `GetField`s. These aliases are introduced while
             // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
             // (Should we just turn `GetField` into a `NamedExpression`?)
+            val trimmed = e.transform { case Alias(g: ExtractValue, _) => g }
             namedGroupingExpressions
-              .get(e.transform { case Alias(g: ExtractValue, _) => g })
-              .map(_.toAttribute)
+              .find { case (k, v) => k semanticEquals trimmed }
+              .map(_._2.toAttribute)
               .getOrElse(e)
         }).asInstanceOf[Seq[NamedExpression]]
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index ca2c4b4019c55..e60d00e63574d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -773,4 +773,22 @@ class SQLQuerySuite extends QueryTest {
         | select * from v2 order by key limit 1
       """.stripMargin), Row(0, 3))
   }
+
+  test("SPARK-7269 Check analysis failed in case in-sensitive") {
+    Seq(1, 2, 3).map { i =>
+      (i.toString, i.toString)
+    }.toDF("key", "value").registerTempTable("df_analysis")
+    sql("SELECT kEy from df_analysis group by key").collect()
+    sql("SELECT kEy+3 from df_analysis group by key+3").collect()
+    sql("SELECT kEy+3, a.kEy, A.kEy from df_analysis A group by key").collect()
+    sql("SELECT cast(kEy+1 as Int) from df_analysis A group by cast(key+1 as int)").collect()
+    sql("SELECT cast(kEy+1 as Int) from df_analysis A group by key+1").collect()
+    sql("SELECT 2 from df_analysis A group by key+1").collect()
+    intercept[AnalysisException] {
+      sql("SELECT kEy+1 from df_analysis group by key+3")
+    }
+    intercept[AnalysisException] {
+      sql("SELECT cast(key+2 as Int) from df_analysis A group by cast(key+1 as int)")
+    }
+  }
 }

From 530397ba2f5c0fcabb86ba73048c95177ed0b9fc Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 18 May 2015 12:17:10 -0700
Subject: [PATCH 238/320] [SPARK-7567] [SQL] [follow-up] Use a new flag to set
 output committer based on mapreduce apis

cc liancheng marmbrus

Author: Yin Huai <yhuai@databricks.com>

Closes #6130 from yhuai/directOutput and squashes the following commits:

312b07d [Yin Huai] A data source can use spark.sql.sources.outputCommitterClass to override the output committer.
---
 .../scala/org/apache/spark/sql/SQLConf.scala  |  4 +++
 .../apache/spark/sql/parquet/newParquet.scala |  2 +-
 .../apache/spark/sql/sources/commands.scala   | 29 ++++++++++++++-----
 .../apache/spark/sql/sources/interfaces.scala |  3 +-
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 6da910e332e9b..77c6af27d1007 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -71,6 +71,10 @@ private[spark] object SQLConf {
   // Whether to perform partition discovery when loading external data sources.  Default to true.
   val PARTITION_DISCOVERY_ENABLED = "spark.sql.sources.partitionDiscovery.enabled"
 
+  // The output committer class used by FSBasedRelation. The specified class needs to be a
+  // subclass of org.apache.hadoop.mapreduce.OutputCommitter.
+  val OUTPUT_COMMITTER_CLASS = "spark.sql.sources.outputCommitterClass"
+
   // Whether to perform eager analysis when constructing a dataframe.
   // Set to false when debugging requires the ability to look at invalid query plans.
   val DATAFRAME_EAGER_ANALYSIS = "spark.sql.eagerAnalysis"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index bcbdb1ebd236a..fea54a251461d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -197,7 +197,7 @@ private[sql] class ParquetRelation2(
         classOf[ParquetOutputCommitter])
 
     conf.setClass(
-      "mapred.output.committer.class",
+      SQLConf.OUTPUT_COMMITTER_CLASS,
       committerClass,
       classOf[ParquetOutputCommitter])
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index a09bb08de736a..d54dbb0831444 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter, FileOutputFormat}
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter => MapReduceFileOutputCommitter, FileOutputFormat}
 import org.apache.hadoop.util.Shell
 import parquet.hadoop.util.ContextUtil
 
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateProjection
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.RunnableCommand
-import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
+import org.apache.spark.sql.{SQLConf, DataFrame, SQLContext, SaveMode}
 
 private[sql] case class InsertIntoDataSource(
     logicalRelation: LogicalRelation,
@@ -287,24 +287,39 @@ private[sql] abstract class BaseWriterContainer(
   protected def getWorkPath: String = {
     outputCommitter match {
       // FileOutputCommitter writes to a temporary location returned by `getWorkPath`.
-      case f: FileOutputCommitter => f.getWorkPath.toString
+      case f: MapReduceFileOutputCommitter => f.getWorkPath.toString
       case _ => outputPath
     }
   }
 
   private def newOutputCommitter(context: TaskAttemptContext): OutputCommitter = {
     val committerClass = context.getConfiguration.getClass(
-      "mapred.output.committer.class", null, classOf[OutputCommitter])
+      SQLConf.OUTPUT_COMMITTER_CLASS, null, classOf[OutputCommitter])
 
     Option(committerClass).map { clazz =>
-      val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
-      ctor.newInstance(new Path(outputPath), context)
+      // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
+      // has an associated output committer. To override this output committer,
+      // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS.
+      // If a data source needs to override the output committer, it needs to set the
+      // output committer in prepareForWrite method.
+      if (classOf[MapReduceFileOutputCommitter].isAssignableFrom(clazz)) {
+        // The specified output committer is a FileOutputCommitter.
+        // So, we will use the FileOutputCommitter-specified constructor.
+        val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
+        ctor.newInstance(new Path(outputPath), context)
+      } else {
+        // The specified output committer is just a OutputCommitter.
+        // So, we will use the no-argument constructor.
+        val ctor = clazz.getDeclaredConstructor()
+        ctor.newInstance()
+      }
     }.getOrElse {
+      // If output committer class is not set, we will use the one associated with the
+      // file output format.
       outputFormatClass.newInstance().getOutputCommitter(context)
     }
   }
 
-
   private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = {
     this.jobId = SparkHadoopWriter.createJobID(new Date, jobId)
     this.taskId = new TaskID(this.jobId, true, splitId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 274ab4485217a..a82a6758d2537 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -527,7 +527,8 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
 
   /**
    * Prepares a write job and returns an [[OutputWriterFactory]].  Client side job preparation can
-   * be put here.  For example, user defined output committer can be configured here.
+   * be put here.  For example, user defined output committer can be configured here
+   * by setting the output committer class in the conf of spark.sql.sources.outputCommitterClass.
    *
    * Note that the only side effect expected here is mutating `job` via its setters.  Especially,
    * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states

From 9dadf019b93038e1e18336ccd06c5eecb4bae32f Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Mon, 18 May 2015 12:45:37 -0700
Subject: [PATCH 239/320] [SPARK-7673] [SQL] WIP: HadoopFsRelation and
 ParquetRelation2 performance optimizations

This PR introduces several performance optimizations to `HadoopFsRelation` and `ParquetRelation2`:

1.  Moving `FileStatus` listing from `DataSourceStrategy` into a cache within `HadoopFsRelation`.

    This new cache generalizes and replaces the one used in `ParquetRelation2`.

    This also introduces an interface change: to reuse cached `FileStatus` objects, `HadoopFsRelation.buildScan` methods now receive `Array[FileStatus]` instead of `Array[String]`.

1.  When Parquet task side metadata reading is enabled, skip reading row group information when reading Parquet footers.

    This is basically what PR #5334 does. Also, now we uses `ParquetFileReader.readAllFootersInParallel` to read footers in parallel.

Another optimization in question is, instead of asking `HadoopFsRelation.buildScan` to return an `RDD[Row]` for a single selected partition and then union them all, we ask it to return an `RDD[Row]` for all selected partitions. This optimization is based on the fact that Hadoop configuration broadcasting used in `NewHadoopRDD` takes 34% time in the following microbenchmark.  However, this complicates data source user code because user code must merge partition values manually.

To check the cost of broadcasting in `NewHadoopRDD`, I also did microbenchmark after removing the `broadcast` call in `NewHadoopRDD`.  All results are shown below.

### Microbenchmark

#### Preparation code

Generating a partitioned table with 50k partitions, 1k rows per partition:

```scala
import sqlContext._
import sqlContext.implicits._

for (n <- 0 until 500) {
  val data = for {
    p <- (n * 10) until ((n + 1) * 10)
    i <- 0 until 1000
  } yield (i, f"val_$i%04d", f"$p%04d")

  data.
    toDF("a", "b", "p").
    write.
    partitionBy("p").
    mode("append").
    parquet(path)
}
```

#### Benchmarking code

```scala
import sqlContext._
import sqlContext.implicits._

import org.apache.spark.sql.types._
import com.google.common.base.Stopwatch

val path = "hdfs://localhost:9000/user/lian/5k"

def benchmark(n: Int)(f: => Unit) {
  val stopwatch = new Stopwatch()

  def run() = {
    stopwatch.reset()
    stopwatch.start()
    f
    stopwatch.stop()
    stopwatch.elapsedMillis()
  }

  val records = (0 until n).map(_ => run())

  (0 until n).foreach(i => println(s"Round $i: ${records(i)} ms"))
  println(s"Average: ${records.sum / n.toDouble} ms")
}

benchmark(3) { read.parquet(path).explain(extended = true) }
```

#### Results

Before:

```
Round 0: 72528 ms
Round 1: 68938 ms
Round 2: 65372 ms
Average: 68946.0 ms
```

After:

```
Round 0: 59499 ms
Round 1: 53645 ms
Round 2: 53844 ms
Round 3: 49093 ms
Round 4: 50555 ms
Average: 53327.2 ms
```

Also removing Hadoop configuration broadcasting:

(Note that I was testing on a local laptop, thus network cost is pretty low.)

```
Round 0: 15806 ms
Round 1: 14394 ms
Round 2: 14699 ms
Round 3: 15334 ms
Round 4: 14123 ms
Average: 14871.2 ms
```

Author: Cheng Lian <lian@databricks.com>

Closes #6225 from liancheng/spark-7673 and squashes the following commits:

2d58a2b [Cheng Lian] Skips reading row group information when using task side metadata reading
7aa3748 [Cheng Lian] Optimizes FileStatusCache by introducing a map from parent directories to child files
ba41250 [Cheng Lian] Reuses HadoopFsRelation FileStatusCache in ParquetRelation2
3d278f7 [Cheng Lian] Fixes a bug when reading a single Parquet data file
b84612a [Cheng Lian] Fixes Scala style issue
6a08b02 [Cheng Lian] WIP: Moves file status cache into HadoopFSRelation
---
 .../apache/spark/sql/parquet/newParquet.scala |  61 +++++-----
 .../sql/sources/DataSourceStrategy.scala      |  37 ++-----
 .../apache/spark/sql/sources/interfaces.scala | 104 ++++++++++++++----
 .../sql/sources/SimpleTextRelation.scala      |   6 +-
 4 files changed, 117 insertions(+), 91 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index fea54a251461d..7ca44f7b81a2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -23,12 +23,11 @@ import scala.collection.JavaConversions._
 import scala.util.Try
 
 import com.google.common.base.Objects
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 import parquet.filter2.predicate.FilterApi
-import parquet.format.converter.ParquetMetadataConverter
 import parquet.hadoop._
 import parquet.hadoop.metadata.CompressionCodecName
 import parquet.hadoop.util.ContextUtil
@@ -175,8 +174,8 @@ private[sql] class ParquetRelation2(
   override def dataSchema: StructType = metadataCache.dataSchema
 
   override private[sql] def refresh(): Unit = {
-    metadataCache.refresh()
     super.refresh()
+    metadataCache.refresh()
   }
 
   // Parquet data source always uses Catalyst internal representations.
@@ -234,15 +233,15 @@ private[sql] class ParquetRelation2(
   override def buildScan(
       requiredColumns: Array[String],
       filters: Array[Filter],
-      inputPaths: Array[String]): RDD[Row] = {
+      inputFiles: Array[FileStatus]): RDD[Row] = {
 
     val job = new Job(SparkHadoopUtil.get.conf)
     val conf = ContextUtil.getConfiguration(job)
 
     ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
 
-    if (inputPaths.nonEmpty) {
-      FileInputFormat.setInputPaths(job, inputPaths.map(new Path(_)): _*)
+    if (inputFiles.nonEmpty) {
+      FileInputFormat.setInputPaths(job, inputFiles.map(_.getPath): _*)
     }
 
     // Try to push down filters when filter push-down is enabled.
@@ -269,10 +268,7 @@ private[sql] class ParquetRelation2(
     val useMetadataCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean
     conf.set(SQLConf.PARQUET_CACHE_METADATA, useMetadataCache.toString)
 
-    val inputFileStatuses =
-      metadataCache.dataStatuses.filter(f => inputPaths.contains(f.getPath.toString))
-
-    val footers = inputFileStatuses.map(metadataCache.footers)
+    val footers = inputFiles.map(f => metadataCache.footers(f.getPath))
 
     // TODO Stop using `FilteringParquetRowInputFormat` and overriding `getPartition`.
     // After upgrading to Parquet 1.6.0, we should be able to stop caching `FileStatus` objects and
@@ -287,7 +283,7 @@ private[sql] class ParquetRelation2(
 
       val cacheMetadata = useMetadataCache
 
-      @transient val cachedStatuses = inputFileStatuses.map { f =>
+      @transient val cachedStatuses = inputFiles.map { f =>
         // In order to encode the authority of a Path containing special characters such as /,
         // we need to use the string returned by the URI of the path to create a new Path.
         val pathWithAuthority = new Path(f.getPath.toUri.toString)
@@ -333,7 +329,7 @@ private[sql] class ParquetRelation2(
     private var commonMetadataStatuses: Array[FileStatus] = _
 
     // Parquet footer cache.
-    var footers: Map[FileStatus, Footer] = _
+    var footers: Map[Path, Footer] = _
 
     // `FileStatus` objects of all data files (Parquet part-files).
     var dataStatuses: Array[FileStatus] = _
@@ -349,35 +345,30 @@ private[sql] class ParquetRelation2(
      * Refreshes `FileStatus`es, footers, partition spec, and table schema.
      */
     def refresh(): Unit = {
-      // Support either reading a collection of raw Parquet part-files, or a collection of folders
-      // containing Parquet files (e.g. partitioned Parquet table).
-      val baseStatuses = paths.distinct.flatMap { p =>
-        val path = new Path(p)
-        val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
-        val qualified = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
-        Try(fs.getFileStatus(qualified)).toOption
-      }
-      assert(baseStatuses.forall(!_.isDir) || baseStatuses.forall(_.isDir))
-
       // Lists `FileStatus`es of all leaf nodes (files) under all base directories.
-      val leaves = baseStatuses.flatMap { f =>
-        val fs = FileSystem.get(f.getPath.toUri, SparkHadoopUtil.get.conf)
-        SparkHadoopUtil.get.listLeafStatuses(fs, f.getPath).filter { f =>
-          isSummaryFile(f.getPath) ||
-            !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith("."))
-        }
-      }
+      val leaves = cachedLeafStatuses().filter { f =>
+        isSummaryFile(f.getPath) ||
+          !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith("."))
+      }.toArray
 
       dataStatuses = leaves.filterNot(f => isSummaryFile(f.getPath))
       metadataStatuses = leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_METADATA_FILE)
       commonMetadataStatuses =
         leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE)
 
-      footers = (dataStatuses ++ metadataStatuses ++ commonMetadataStatuses).par.map { f =>
-        val parquetMetadata = ParquetFileReader.readFooter(
-          SparkHadoopUtil.get.conf, f, ParquetMetadataConverter.NO_FILTER)
-        f -> new Footer(f.getPath, parquetMetadata)
-      }.seq.toMap
+      footers = {
+        val conf = SparkHadoopUtil.get.conf
+        val taskSideMetaData = conf.getBoolean(ParquetInputFormat.TASK_SIDE_METADATA, true)
+        val rawFooters = if (shouldMergeSchemas) {
+          ParquetFileReader.readAllFootersInParallel(
+            conf, seqAsJavaList(leaves), taskSideMetaData)
+        } else {
+          ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(
+            conf, seqAsJavaList(leaves), taskSideMetaData)
+        }
+
+        rawFooters.map(footer => footer.getFile -> footer).toMap
+      }
 
       // If we already get the schema, don't need to re-compute it since the schema merging is
       // time-consuming.
@@ -448,7 +439,7 @@ private[sql] class ParquetRelation2(
         "No schema defined, " +
           s"and no Parquet data file or summary file found under ${paths.mkString(", ")}.")
 
-      ParquetRelation2.readSchema(filesToTouch.map(footers.apply), sqlContext)
+      ParquetRelation2.readSchema(filesToTouch.map(f => footers.apply(f.getPath)), sqlContext)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index e6324b20b3065..1615a6dcbdb2a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -17,20 +17,16 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.hadoop.fs.Path
-
 import org.apache.spark.Logging
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.rdd.{UnionRDD, RDD}
-import org.apache.spark.sql.Row
+import org.apache.spark.rdd.{RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.types.{StructType, UTF8String, StringType}
-import org.apache.spark.sql._
+import org.apache.spark.sql.types.{StringType, StructType, UTF8String}
+import org.apache.spark.sql.{SaveMode, Strategy, execution, sources}
 
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
@@ -58,7 +54,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
         filters,
         (a, _) => t.buildScan(a)) :: Nil
 
-    // Scanning partitioned FSBasedRelation
+    // Scanning partitioned HadoopFsRelation
     case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation))
         if t.partitionSpec.partitionColumns.nonEmpty =>
       val selectedPartitions = prunePartitions(filters, t.partitionSpec).toArray
@@ -86,22 +82,13 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
         t.partitionSpec.partitionColumns,
         selectedPartitions) :: Nil
 
-    // Scanning non-partitioned FSBasedRelation
+    // Scanning non-partitioned HadoopFsRelation
     case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation)) =>
-      val inputPaths = t.paths.map(new Path(_)).flatMap { path =>
-        val fs = path.getFileSystem(t.sqlContext.sparkContext.hadoopConfiguration)
-        val qualifiedPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
-        SparkHadoopUtil.get.listLeafStatuses(fs, qualifiedPath).map(_.getPath).filterNot { path =>
-          val name = path.getName
-          name.startsWith("_") || name.startsWith(".")
-        }.map(fs.makeQualified(_).toString)
-      }
-
       pruneFilterProject(
         l,
         projectList,
         filters,
-        (a, f) => t.buildScan(a, f, inputPaths)) :: Nil
+        (a, f) => t.buildScan(a, f, t.paths)) :: Nil
 
     case l @ LogicalRelation(t: TableScan) =>
       createPhysicalRDD(l.relation, l.output, t.buildScan()) :: Nil
@@ -130,16 +117,6 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
 
     // Builds RDD[Row]s for each selected partition.
     val perPartitionRows = partitions.map { case Partition(partitionValues, dir) =>
-      // Paths to all data files within this partition
-      val dataFilePaths = {
-        val dirPath = new Path(dir)
-        val fs = dirPath.getFileSystem(SparkHadoopUtil.get.conf)
-        fs.listStatus(dirPath).map(_.getPath).filterNot { path =>
-          val name = path.getName
-          name.startsWith("_") || name.startsWith(".")
-        }.map(fs.makeQualified(_).toString)
-      }
-
       // The table scan operator (PhysicalRDD) which retrieves required columns from data files.
       // Notice that the schema of data files, represented by `relation.dataSchema`, may contain
       // some partition column(s).
@@ -155,7 +132,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
             // assuming partition columns data stored in data files are always consistent with those
             // partition values encoded in partition directory paths.
             val nonPartitionColumns = requiredColumns.filterNot(partitionColNames.contains)
-            val dataRows = relation.buildScan(nonPartitionColumns, filters, dataFilePaths)
+            val dataRows = relation.buildScan(nonPartitionColumns, filters, Array(dir))
 
             // Merges data values with partition values.
             mergeWithPartitionValues(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index a82a6758d2537..9b52d1be3df2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.sql.sources
 
+import scala.collection.mutable
 import scala.util.Try
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
-import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions._
@@ -368,18 +368,61 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
 
   private var _partitionSpec: PartitionSpec = _
 
+  private class FileStatusCache {
+    var leafFiles = mutable.Map.empty[Path, FileStatus]
+
+    var leafDirToChildrenFiles = mutable.Map.empty[Path, Array[FileStatus]]
+
+    var leafDirs = mutable.Map.empty[Path, FileStatus]
+
+    def refresh(): Unit = {
+      def listLeafFilesAndDirs(fs: FileSystem, status: FileStatus): Set[FileStatus] = {
+        val (dirs, files) = fs.listStatus(status.getPath).partition(_.isDir)
+        val leafDirs = if (dirs.isEmpty) Set(status) else Set.empty[FileStatus]
+        files.toSet ++ leafDirs ++ dirs.flatMap(dir => listLeafFilesAndDirs(fs, dir))
+      }
+
+      leafDirs.clear()
+      leafFiles.clear()
+
+      // We don't filter files/directories like _temporary/_SUCCESS here, as specific data sources
+      // may take advantages over them (e.g. Parquet _metadata and _common_metadata files).
+      val statuses = paths.flatMap { path =>
+        val hdfsPath = new Path(path)
+        val fs = hdfsPath.getFileSystem(hadoopConf)
+        val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        Try(fs.getFileStatus(qualified)).toOption.toArray.flatMap(listLeafFilesAndDirs(fs, _))
+      }
+
+      val (dirs, files) = statuses.partition(_.isDir)
+      leafDirs ++= dirs.map(d => d.getPath -> d).toMap
+      leafFiles ++= files.map(f => f.getPath -> f).toMap
+      leafDirToChildrenFiles ++= files.groupBy(_.getPath.getParent)
+    }
+  }
+
+  private lazy val fileStatusCache = {
+    val cache = new FileStatusCache
+    cache.refresh()
+    cache
+  }
+
+  protected def cachedLeafStatuses(): Set[FileStatus] = {
+    fileStatusCache.leafFiles.values.toSet
+  }
+
   final private[sql] def partitionSpec: PartitionSpec = {
     if (_partitionSpec == null) {
       _partitionSpec = maybePartitionSpec
         .map(spec => spec.copy(partitionColumns = spec.partitionColumns.asNullable))
         .orElse(userDefinedPartitionColumns.map(PartitionSpec(_, Array.empty[Partition])))
         .getOrElse {
-        if (sqlContext.conf.partitionDiscoveryEnabled()) {
-          discoverPartitions()
-        } else {
-          PartitionSpec(StructType(Nil), Array.empty[Partition])
+          if (sqlContext.conf.partitionDiscoveryEnabled()) {
+            discoverPartitions()
+          } else {
+            PartitionSpec(StructType(Nil), Array.empty[Partition])
+          }
         }
-      }
     }
     _partitionSpec
   }
@@ -409,20 +452,14 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
   def userDefinedPartitionColumns: Option[StructType] = None
 
   private[sql] def refresh(): Unit = {
+    fileStatusCache.refresh()
     if (sqlContext.conf.partitionDiscoveryEnabled()) {
       _partitionSpec = discoverPartitions()
     }
   }
 
   private def discoverPartitions(): PartitionSpec = {
-    val basePaths = paths.map(new Path(_))
-    val leafDirs = basePaths.flatMap { path =>
-      val fs = path.getFileSystem(hadoopConf)
-      Try(fs.getFileStatus(path.makeQualified(fs.getUri, fs.getWorkingDirectory)))
-        .filter(_.isDir)
-        .map(SparkHadoopUtil.get.listLeafDirStatuses(fs, _))
-        .getOrElse(Seq.empty[FileStatus])
-    }.map(_.getPath)
+    val leafDirs = fileStatusCache.leafDirs.keys.toSeq
 
     if (leafDirs.nonEmpty) {
       PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME)
@@ -444,6 +481,27 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
     })
   }
 
+  private[sources] final def buildScan(
+      requiredColumns: Array[String],
+      filters: Array[Filter],
+      inputPaths: Array[String]): RDD[Row] = {
+    val inputStatuses = inputPaths.flatMap { input =>
+      val path = new Path(input)
+
+      // First assumes `input` is a directory path, and tries to get all files contained in it.
+      fileStatusCache.leafDirToChildrenFiles.getOrElse(
+        path,
+        // Otherwise, `input` might be a file path
+        fileStatusCache.leafFiles.get(path).toArray
+      ).filter { status =>
+        val name = status.getPath.getName
+        !name.startsWith("_") && !name.startsWith(".")
+      }
+    }
+
+    buildScan(requiredColumns, filters, inputStatuses)
+  }
+
   /**
    * Specifies schema of actual data files.  For partitioned relations, if one or more partitioned
    * columns are contained in the data files, they should also appear in `dataSchema`.
@@ -457,13 +515,13 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
    * this relation. For partitioned relations, this method is called for each selected partition,
    * and builds an `RDD[Row]` containing all rows within that single partition.
    *
-   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
    *
    * @since 1.4.0
    */
-  def buildScan(inputPaths: Array[String]): RDD[Row] = {
+  def buildScan(inputFiles: Array[FileStatus]): RDD[Row] = {
     throw new UnsupportedOperationException(
       "At least one buildScan() method should be overridden to read the relation.")
   }
@@ -474,13 +532,13 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
    * and builds an `RDD[Row]` containing all rows within that single partition.
    *
    * @param requiredColumns Required columns.
-   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
    *
    * @since 1.4.0
    */
-  def buildScan(requiredColumns: Array[String], inputPaths: Array[String]): RDD[Row] = {
+  def buildScan(requiredColumns: Array[String], inputFiles: Array[FileStatus]): RDD[Row] = {
     // Yeah, to workaround serialization...
     val dataSchema = this.dataSchema
     val codegenEnabled = this.codegenEnabled
@@ -490,7 +548,7 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
       BoundReference(dataSchema.fieldIndex(col), field.dataType, field.nullable)
     }.toSeq
 
-    buildScan(inputPaths).mapPartitions { rows =>
+    buildScan(inputFiles).mapPartitions { rows =>
       val buildProjection = if (codegenEnabled) {
         GenerateMutableProjection.generate(requiredOutput, dataSchema.toAttributes)
       } else {
@@ -512,7 +570,7 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
    *        of all `filters`.  The pushed down filters are currently purely an optimization as they
    *        will all be evaluated again. This means it is safe to use them with methods that produce
    *        false positives such as filtering partitions based on a bloom filter.
-   * @param inputPaths For a non-partitioned relation, it contains paths of all data files in the
+   * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the
    *        relation. For a partitioned relation, it contains paths of all data files in a single
    *        selected partition.
    *
@@ -521,8 +579,8 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
   def buildScan(
       requiredColumns: Array[String],
       filters: Array[Filter],
-      inputPaths: Array[String]): RDD[Row] = {
-    buildScan(requiredColumns, inputPaths)
+      inputFiles: Array[FileStatus]): RDD[Row] = {
+    buildScan(requiredColumns, inputFiles)
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
index 29b21586f9c2a..09eed6646c55a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -21,7 +21,7 @@ import java.text.NumberFormat
 import java.util.UUID
 
 import com.google.common.base.Objects
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.io.{NullWritable, Text}
 import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat, TextOutputFormat}
 import org.apache.hadoop.mapreduce.{Job, RecordWriter, TaskAttemptContext}
@@ -101,10 +101,10 @@ class SimpleTextRelation(
   override def hashCode(): Int =
     Objects.hashCode(paths, maybeDataSchema, dataSchema)
 
-  override def buildScan(inputPaths: Array[String]): RDD[Row] = {
+  override def buildScan(inputStatuses: Array[FileStatus]): RDD[Row] = {
     val fields = dataSchema.map(_.dataType)
 
-    sparkContext.textFile(inputPaths.mkString(",")).map { record =>
+    sparkContext.textFile(inputStatuses.map(_.getPath).mkString(",")).map { record =>
       Row(record.split(",").zip(fields).map { case (value, dataType) =>
         Cast(Literal(value), dataType).eval()
       }: _*)

From 32fbd297dd651ba3ce4ce52aeb0488233149cdf9 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Mon, 18 May 2015 12:55:13 -0700
Subject: [PATCH 240/320] [SPARK-6216] [PYSPARK] check python version of worker
 with driver

This PR revert #5404, change to pass the version of python in driver into JVM, check it in worker before deserializing closure, then it can works with different major version of Python.

Author: Davies Liu <davies@databricks.com>

Closes #6203 from davies/py_version and squashes the following commits:

b8fb76e [Davies Liu] fix test
6ce5096 [Davies Liu] use string for version
47c6278 [Davies Liu] check python version of worker with driver
---
 .../org/apache/spark/api/python/PythonRDD.scala      |  3 +++
 python/pyspark/context.py                            |  1 +
 python/pyspark/rdd.py                                |  4 ++--
 python/pyspark/sql/context.py                        |  1 +
 python/pyspark/sql/functions.py                      |  4 ++--
 python/pyspark/tests.py                              |  6 +++---
 python/pyspark/worker.py                             | 12 +++++++-----
 .../scala/org/apache/spark/sql/UDFRegistration.scala |  2 ++
 .../org/apache/spark/sql/UserDefinedFunction.scala   |  5 +++--
 .../org/apache/spark/sql/execution/pythonUdfs.scala  |  2 ++
 10 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 7409dc2d866f6..2d92f6a42b308 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -47,6 +47,7 @@ private[spark] class PythonRDD(
     pythonIncludes: JList[String],
     preservePartitoning: Boolean,
     pythonExec: String,
+    pythonVer: String,
     broadcastVars: JList[Broadcast[PythonBroadcast]],
     accumulator: Accumulator[JList[Array[Byte]]])
   extends RDD[Array[Byte]](parent) {
@@ -210,6 +211,8 @@ private[spark] class PythonRDD(
         val dataOut = new DataOutputStream(stream)
         // Partition index
         dataOut.writeInt(split.index)
+        // Python version of driver
+        PythonRDD.writeUTF(pythonVer, dataOut)
         // sparkFilesDir
         PythonRDD.writeUTF(SparkFiles.getRootDirectory, dataOut)
         // Python includes (*.zip and *.egg files)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 31992795a9e45..d25ee855235be 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -173,6 +173,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
             self._jvm.PythonAccumulatorParam(host, port))
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
+        self.pythonVer = "%d.%d" % sys.version_info[:2]
 
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 545c5ad20cb96..70db4bbe4cbc5 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2260,7 +2260,7 @@ def toLocalIterator(self):
 def _prepare_for_python_RDD(sc, command, obj=None):
     # the serialized command will be compressed by broadcast
     ser = CloudPickleSerializer()
-    pickled_command = ser.dumps((command, sys.version_info[:2]))
+    pickled_command = ser.dumps(command)
     if len(pickled_command) > (1 << 20):  # 1M
         # The broadcast will have same life cycle as created PythonRDD
         broadcast = sc.broadcast(pickled_command)
@@ -2344,7 +2344,7 @@ def _jrdd(self):
         python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
                                              bytearray(pickled_cmd),
                                              env, includes, self.preservesPartitioning,
-                                             self.ctx.pythonExec,
+                                             self.ctx.pythonExec, self.ctx.pythonVer,
                                              bvars, self.ctx._javaAccumulator)
         self._jrdd_val = python_rdd.asJavaRDD()
 
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index f6f107ca32d2f..0bde7191242ab 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -157,6 +157,7 @@ def registerFunction(self, name, f, returnType=StringType()):
                                             env,
                                             includes,
                                             self._sc.pythonExec,
+                                            self._sc.pythonVer,
                                             bvars,
                                             self._sc._javaAccumulator,
                                             returnType.json())
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 8d0e766ecd3b4..fbe9bf5b526af 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -353,8 +353,8 @@ def _create_judf(self):
         ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc())
         jdt = ssql_ctx.parseDataType(self.returnType.json())
         fname = f.__name__ if hasattr(f, '__name__') else f.__class__.__name__
-        judf = sc._jvm.UserDefinedPythonFunction(fname, bytearray(pickled_command), env,
-                                                 includes, sc.pythonExec, broadcast_vars,
+        judf = sc._jvm.UserDefinedPythonFunction(fname, bytearray(pickled_command), env, includes,
+                                                 sc.pythonExec, sc.pythonVer, broadcast_vars,
                                                  sc._javaAccumulator, jdt)
         return judf
 
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 09de4d159fdcf..5e023f6c53517 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -1543,13 +1543,13 @@ def count():
     def test_with_different_versions_of_python(self):
         rdd = self.sc.parallelize(range(10))
         rdd.count()
-        version = sys.version_info
-        sys.version_info = (2, 0, 0)
+        version = self.sc.pythonVer
+        self.sc.pythonVer = "2.0"
         try:
             with QuietTest(self.sc):
                 self.assertRaises(Py4JJavaError, lambda: rdd.count())
         finally:
-            sys.version_info = version
+            self.sc.pythonVer = version
 
 
 class SparkSubmitTests(unittest.TestCase):
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index fbdaf3a5814cd..93df9002be377 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -57,6 +57,12 @@ def main(infile, outfile):
         if split_index == -1:  # for unit tests
             exit(-1)
 
+        version = utf8_deserializer.loads(infile)
+        if version != "%d.%d" % sys.version_info[:2]:
+            raise Exception(("Python in worker has different version %s than that in " +
+                             "driver %s, PySpark cannot run with different minor versions") %
+                            ("%d.%d" % sys.version_info[:2], version))
+
         # initialize global state
         shuffle.MemoryBytesSpilled = 0
         shuffle.DiskBytesSpilled = 0
@@ -92,11 +98,7 @@ def main(infile, outfile):
         command = pickleSer._read_with_length(infile)
         if isinstance(command, Broadcast):
             command = pickleSer.loads(command.value)
-        (func, profiler, deserializer, serializer), version = command
-        if version != sys.version_info[:2]:
-            raise Exception(("Python in worker has different version %s than that in " +
-                            "driver %s, PySpark cannot run with different minor versions") %
-                            (sys.version_info[:2], version))
+        func, profiler, deserializer, serializer = command
         init_time = time.time()
 
         def process():
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index dc3389c41bbfa..3cc5c2441d8a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -46,6 +46,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
       envVars: JMap[String, String],
       pythonIncludes: JList[String],
       pythonExec: String,
+      pythonVer: String,
       broadcastVars: JList[Broadcast[PythonBroadcast]],
       accumulator: Accumulator[JList[Array[Byte]]],
       stringDataType: String): Unit = {
@@ -70,6 +71,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
         envVars,
         pythonIncludes,
         pythonExec,
+        pythonVer,
         broadcastVars,
         accumulator,
         dataType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
index 505ab1301ec96..a02e202d2eebc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala
@@ -58,14 +58,15 @@ private[sql] case class UserDefinedPythonFunction(
     envVars: JMap[String, String],
     pythonIncludes: JList[String],
     pythonExec: String,
+    pythonVer: String,
     broadcastVars: JList[Broadcast[PythonBroadcast]],
     accumulator: Accumulator[JList[Array[Byte]]],
     dataType: DataType) {
 
   /** Returns a [[Column]] that will evaluate to calling this UDF with the given input. */
   def apply(exprs: Column*): Column = {
-    val udf = PythonUDF(name, command, envVars, pythonIncludes, pythonExec, broadcastVars,
-      accumulator, dataType, exprs.map(_.expr))
+    val udf = PythonUDF(name, command, envVars, pythonIncludes, pythonExec, pythonVer,
+      broadcastVars, accumulator, dataType, exprs.map(_.expr))
     Column(udf)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index 65dd7ba020fa3..11b2897f76786 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -46,6 +46,7 @@ private[spark] case class PythonUDF(
     envVars: JMap[String, String],
     pythonIncludes: JList[String],
     pythonExec: String,
+    pythonVer: String,
     broadcastVars: JList[Broadcast[PythonBroadcast]],
     accumulator: Accumulator[JList[Array[Byte]]],
     dataType: DataType,
@@ -251,6 +252,7 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child:
       udf.pythonIncludes,
       false,
       udf.pythonExec,
+      udf.pythonVer,
       udf.broadcastVars,
       udf.accumulator
     ).mapPartitions { iter =>

From 0b6f503d5337a8387c37cc2c8e544f67c68f7dad Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Mon, 18 May 2015 13:34:43 -0700
Subject: [PATCH 241/320] [SPARK-7658] [STREAMING] [WEBUI] Update the mouse
 behaviors for the timeline graphs

1. If the user click one point of a batch, scroll down to the corresponding batch row and highlight it. And recovery the batch row after 3 seconds if necessary.

2. Add "#batches" in the histogram graphs.

![screen shot 2015-05-14 at 7 36 19 pm](https://cloud.githubusercontent.com/assets/1000778/7646108/84f4a014-fa73-11e4-8c13-1903d267e60f.png)

![screen shot 2015-05-14 at 7 36 53 pm](https://cloud.githubusercontent.com/assets/1000778/7646109/8b11154a-fa73-11e4-820b-8ece9fa6ee3e.png)

![screen shot 2015-05-14 at 7 36 34 pm](https://cloud.githubusercontent.com/assets/1000778/7646111/93828272-fa73-11e4-89f8-580670144d3c.png)

Author: zsxwing <zsxwing@gmail.com>

Closes #6168 from zsxwing/SPARK-7658 and squashes the following commits:

c242b00 [zsxwing] Change 5 seconds to 3 seconds
31fd0aa [zsxwing] Remove the mouseover highlight feature
06c6f6f [zsxwing] Merge branch 'master' into SPARK-7658
2eaff06 [zsxwing] Merge branch 'master' into SPARK-7658
108d56c [zsxwing] Update the mouse behaviors for the timeline graphs
---
 .../streaming/ui/static/streaming-page.css    |  4 ++
 .../streaming/ui/static/streaming-page.js     | 42 ++++++++++++++++++-
 .../spark/streaming/ui/AllBatchesTable.scala  |  3 +-
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css
index 19abe889ad3c1..b22c884bfebdb 100644
--- a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css
+++ b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css
@@ -60,3 +60,7 @@
 span.expand-input-rate {
   cursor: pointer;
 }
+
+tr.batch-table-cell-highlight > td {
+  background-color: #D6FFE4 !important;
+}
diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
index 0ee6752b29e9a..75251f493ad22 100644
--- a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
+++ b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
@@ -146,6 +146,12 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
         .attr("class", "line")
         .attr("d", line);
 
+    // If the user click one point in the graphs, jump to the batch row and highlight it. And
+    // recovery the batch row after 3 seconds if necessary.
+    // We need to remember the last clicked batch so that we can recovery it.
+    var lastClickedBatch = null;
+    var lastTimeout = null;
+
     // Add points to the line. However, we make it invisible at first. But when the user moves mouse
     // over a point, it will be displayed with its detail.
     svg.selectAll(".point")
@@ -154,6 +160,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
             .attr("stroke", "white") // white and opacity = 0 make it invisible
             .attr("fill", "white")
             .attr("opacity", "0")
+            .style("cursor", "pointer")
             .attr("cx", function(d) { return x(d.x); })
             .attr("cy", function(d) { return y(d.y); })
             .attr("r", function(d) { return 3; })
@@ -175,7 +182,29 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
                     .attr("opacity", "0");
             })
             .on("click", function(d) {
-                window.location.href = "batch/?id=" + d.x;
+                if (lastTimeout != null) {
+                    window.clearTimeout(lastTimeout);
+                }
+                if (lastClickedBatch != null) {
+                    clearBatchRow(lastClickedBatch);
+                    lastClickedBatch = null;
+                }
+                lastClickedBatch = d.x;
+                highlightBatchRow(lastClickedBatch)
+                lastTimeout = window.setTimeout(function () {
+                    lastTimeout = null;
+                    if (lastClickedBatch != null) {
+                        clearBatchRow(lastClickedBatch);
+                        lastClickedBatch = null;
+                    }
+                }, 3000); // Clean up after 3 seconds
+
+                var batchSelector = $("#batch-" + d.x);
+                var topOffset = batchSelector.offset().top - 15;
+                if (topOffset < 0) {
+                    topOffset = 0;
+                }
+                $('html,body').animate({scrollTop: topOffset}, 200);
             });
 }
 
@@ -218,6 +247,9 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
     svg.append("g")
         .attr("class", "x axis")
         .call(xAxis)
+        .append("text")
+            .attr("transform", "translate(" + (margin.left + width - 40) + ", 15)")
+            .text("#batches");
 
     svg.append("g")
         .attr("class", "y axis")
@@ -279,3 +311,11 @@ $(function() {
         $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
     }
 });
+
+function highlightBatchRow(batch) {
+    $("#batch-" + batch).parent().addClass("batch-table-cell-highlight");
+}
+
+function clearBatchRow(batch) {
+    $("#batch-" + batch).parent().removeClass("batch-table-cell-highlight");
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
index 00cc47d6a3ca5..f702bd5bc9466 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
@@ -44,8 +44,9 @@ private[ui] abstract class BatchTableBase(tableId: String, batchInterval: Long)
     val formattedSchedulingDelay = schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-")
     val processingTime = batch.processingDelay
     val formattedProcessingTime = processingTime.map(SparkUIUtils.formatDuration).getOrElse("-")
+    val batchTimeId = s"batch-$batchTime"
 
-    <td sorttable_customkey={batchTime.toString}>
+    <td id={batchTimeId} sorttable_customkey={batchTime.toString}>
       <a href={s"batch?id=$batchTime"}>
         {formattedBatchTime}
       </a>

From fcf90b75ccf222bd2f1939addc3f8f052d2bd3ff Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Mon, 18 May 2015 14:04:04 -0700
Subject: [PATCH 242/320] [HOTFIX] Fix ORC build break

Fix break caused by merging #6225 and #6194.

Author: Michael Armbrust <michael@databricks.com>

Closes #6244 from marmbrus/fixOrcBuildBreak and squashes the following commits:

b10e47b [Michael Armbrust] [HOTFIX] Fix ORC Build break
---
 .../org/apache/spark/sql/hive/orc/OrcRelation.scala   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
index 9708199f07349..e10d3a0b6846c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.hive.orc
 import java.util.{Objects, Properties}
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSerde, OrcSplit}
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
@@ -171,9 +171,10 @@ private[sql] case class OrcRelation(
       maybePartitionSpec)
   }
 
-  override def buildScan(requiredColumns: Array[String],
+  override def buildScan(
+      requiredColumns: Array[String],
       filters: Array[Filter],
-      inputPaths: Array[String]): RDD[Row] = {
+      inputPaths: Array[FileStatus]): RDD[Row] = {
     val output = StructType(requiredColumns.map(dataSchema(_))).toAttributes
     OrcTableScan(output, this, filters, inputPaths).execute()
   }
@@ -194,7 +195,7 @@ private[orc] case class OrcTableScan(
     attributes: Seq[Attribute],
     @transient relation: OrcRelation,
     filters: Array[Filter],
-    inputPaths: Array[String])
+    inputPaths: Array[FileStatus])
   extends Logging
   with HiveInspectors {
 
@@ -256,7 +257,7 @@ private[orc] case class OrcTableScan(
     addColumnIds(attributes, relation, conf)
 
     if (inputPaths.nonEmpty) {
-      FileInputFormat.setInputPaths(job, inputPaths.map(new Path(_)): _*)
+      FileInputFormat.setInputPaths(job, inputPaths.map(_.getPath): _*)
     }
 
     val inputFormatClass =

From b93c97d79b42a06b48d2a8d98beccc636442541e Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Mon, 18 May 2015 14:33:33 -0700
Subject: [PATCH 243/320] [SPARK-7501] [STREAMING] DAG visualization: show
 DStream operations

This is similar to #5999, but for streaming. Roughly 200 lines are tests.

One thing to note here is that we already do some kind of scoping thing for call sites, so this patch adds the new RDD operation scoping logic in the same place. Also, this patch adds a `try finally` block to set the relevant variables in a safer way.

tdas zsxwing

------------------------
**Before**
<img src="https://cloud.githubusercontent.com/assets/2133137/7625996/d88211b8-f9b4-11e4-90b9-e11baa52d6d7.png" width="450px"/>

--------------------------
**After**
<img src="https://cloud.githubusercontent.com/assets/2133137/7625997/e0878f8c-f9b4-11e4-8df3-7dd611b13c87.png" width="650px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6034 from andrewor14/dag-viz-streaming and squashes the following commits:

932a64a [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
e685df9 [Andrew Or] Rename createRDDWith
84d0656 [Andrew Or] Review feedback
697c086 [Andrew Or] Fix tests
53b9936 [Andrew Or] Set scopes for foreachRDD properly
1881802 [Andrew Or] Refactor DStream scope names again
af4ba8d [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
fd07d22 [Andrew Or] Make MQTT lower case
f6de871 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
0ca1801 [Andrew Or] Remove a few unnecessary withScopes on aliases
fa4e5fb [Andrew Or] Pass in input stream name rather than defining it from within
1af0b0e [Andrew Or] Fix style
074c00b [Andrew Or] Review comments
d25a324 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
e4a93ac [Andrew Or] Fix tests?
25416dc [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
9113183 [Andrew Or] Add tests for DStream scopes
b3806ab [Andrew Or] Fix test
bb80bbb [Andrew Or] Fix MIMA?
5c30360 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
5703939 [Andrew Or] Rename operations that create InputDStreams
7c4513d [Andrew Or] Group RDDs by DStream operations and batches
bf0ab6e [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
05c2676 [Andrew Or] Wrap many more methods in withScope
c121047 [Andrew Or] Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
65ef3e9 [Andrew Or] Fix NPE
a0d3263 [Andrew Or] Scope streaming operations instead of RDD operations
---
 .../apache/spark/ui/static/dagre-d3.min.js    |   2 +-
 .../scala/org/apache/spark/SparkContext.scala |   2 +-
 .../apache/spark/rdd/RDDOperationScope.scala  |  24 ++-
 .../spark/ui/scope/RDDOperationGraph.scala    |   6 +-
 .../spark/rdd/RDDOperationScopeSuite.scala    |  12 +-
 .../kafka/DirectKafkaInputDStream.scala       |   3 +
 .../spark/streaming/kafka/KafkaUtils.scala    |  17 +-
 .../streaming/mqtt/MQTTInputDStream.scala     |   3 +-
 .../spark/streaming/StreamingContext.scala    |  48 +++--
 .../spark/streaming/dstream/DStream.scala     | 177 +++++++++++-----
 .../streaming/dstream/ForEachDStream.scala    |   2 +-
 .../streaming/dstream/InputDStream.scala      |  32 ++-
 .../dstream/PairDStreamFunctions.scala        | 111 +++++-----
 .../spark/streaming/DStreamScopeSuite.scala   | 190 ++++++++++++++++++
 14 files changed, 484 insertions(+), 145 deletions(-)
 create mode 100644 streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala

diff --git a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
index c55f752620dfd..2d9262b972a59 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
@@ -20,7 +20,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});var makeClusterIdentifier=function(v){return"cluster_"+v.replace(/^cluster/,"")};svgClusters.enter().append("g").attr("class",makeClusterIdentifier).attr("name",function(v){return g.node(v).label}).classed("cluster",true).style("opacity",0).append("rect");var sortedClusters=util.orderByRank(g,svgClusters.data());for(var i=0;i<sortedClusters.length;i++){var v=sortedClusters[i];var node=g.node(v);if(node.label){var thisGroup=selection.select("g.cluster."+makeClusterIdentifier(v));labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),bbox=_.pick(labelDom.node().getBBox(),"width","height");node.paddingTop+=bbox.height;node.paddingTop+=util.getMaxChildPaddingTop(g,v)}}util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop});svgClusters.each(function(){var cluster=d3.select(this),label=cluster.select("g.label"),rect=cluster.select("rect"),bbox=label.node().getBBox(),labelW=bbox.width,labelH=bbox.height;var num=function(x){return parseFloat(x.toString().replace(/px$/,""))};var labelX=num(rect.attr("x"))+num(rect.attr("width"))-labelH/2-labelW/2;var labelY=num(rect.attr("y"))+labelH;label.attr("transform","translate("+labelX+","+labelY+")")})}},{"./label/add-label":18,"./lodash":20,"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("class",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).classed("node",true).style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var requiredWidth=0,requiredHeight=0;var nextNode=g.node(g.parent(v));while(nextNode){var tempGroup=thisGroup.append("g");var tempLabel=addLabel(tempGroup,nextNode);var tempBBox=tempLabel.node().getBBox();tempBBox.width-=50;requiredWidth=Math.max(requiredWidth,tempBBox.width);requiredHeight=Math.max(requiredHeight,tempBBox.height);tempLabel.remove();nextNode=g.node(g.parent(nextNode.label))}var shapeBBox=shapeSvg.node().getBBox();shapeBBox.width=Math.max(shapeBBox.width,requiredWidth);shapeBBox.height=Math.max(shapeBBox.height,requiredHeight);node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,getMaxChildPaddingTop:getMaxChildPaddingTop,orderByRank:orderByRank,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function getMaxChildPaddingTop(g,v){var maxPadding=0;var children=g.children(v);for(var i=0;i<children.length;i++){var child=g.node(children[i]);if(child.paddingTop&&child.paddingTop>maxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;i<children.length;i++){var thisRank=getRank(g,children[i])+1;if(thisRank>maxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){
+module.exports={graphlib:require("./lib/graphlib"),dagre:require("./lib/dagre"),intersect:require("./lib/intersect"),render:require("./lib/render"),util:require("./lib/util"),version:require("./lib/version")}},{"./lib/dagre":8,"./lib/graphlib":9,"./lib/intersect":10,"./lib/render":23,"./lib/util":25,"./lib/version":26}],2:[function(require,module,exports){var util=require("./util");module.exports={"default":normal,normal:normal,vee:vee,undirected:undirected};function normal(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function vee(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 0 L 10 5 L 0 10 L 4 5 z").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}function undirected(parent,id,edge,type){var marker=parent.append("marker").attr("id",id).attr("viewBox","0 0 10 10").attr("refX",9).attr("refY",5).attr("markerUnits","strokeWidth").attr("markerWidth",8).attr("markerHeight",6).attr("orient","auto");var path=marker.append("path").attr("d","M 0 5 L 10 5").style("stroke-width",1).style("stroke-dasharray","1,0");util.applyStyle(path,edge[type+"Style"])}},{"./util":25}],3:[function(require,module,exports){var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util");module.exports=createClusters;function createClusters(selection,g){var clusters=g.nodes().filter(function(v){return util.isSubgraph(g,v)}),svgClusters=selection.selectAll("g.cluster").data(clusters,function(v){return v});var makeClusterIdentifier=function(v){return"cluster_"+v.replace(/^cluster/,"")};svgClusters.enter().append("g").attr("class",makeClusterIdentifier).attr("name",function(v){return g.node(v).label}).classed("cluster",true).style("opacity",0).append("rect");var sortedClusters=util.orderByRank(g,svgClusters.data());for(var i=0;i<sortedClusters.length;i++){var v=sortedClusters[i];var node=g.node(v);if(node.label){var thisGroup=selection.select("g.cluster."+makeClusterIdentifier(v));labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),bbox=_.pick(labelDom.node().getBBox(),"width","height");node.paddingTop+=bbox.height;node.paddingTop+=util.getMaxChildPaddingTop(g,v)}}util.applyTransition(svgClusters.exit(),g).style("opacity",0).remove();util.applyTransition(svgClusters,g).style("opacity",1);util.applyTransition(svgClusters.selectAll("rect"),g).attr("width",function(v){var node=g.node(v);return node.width+node.paddingLeft+node.paddingRight}).attr("height",function(v){var node=g.node(v);return node.height+node.paddingTop+node.paddingBottom}).attr("x",function(v){var node=g.node(v);return node.x-node.width/2-node.paddingLeft}).attr("y",function(v){var node=g.node(v);return node.y-node.height/2-node.paddingTop});svgClusters.each(function(){var cluster=d3.select(this),label=cluster.select("g.label"),rect=cluster.select("rect"),bbox=label.node().getBBox(),labelW=bbox.width,labelH=bbox.height;var num=function(x){return parseFloat(x.toString().replace(/px$/,""))};var labelX=num(rect.attr("x"))+num(rect.attr("width"))-labelH/2+labelW/2;var labelY=num(rect.attr("y"))+labelH;label.attr("text-anchor","end").attr("transform","translate("+labelX+","+labelY+")")})}},{"./label/add-label":18,"./lodash":20,"./util":25}],4:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createEdgeLabels;function createEdgeLabels(selection,g){var svgEdgeLabels=selection.selectAll("g.edgeLabel").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);svgEdgeLabels.selectAll("*").remove();svgEdgeLabels.enter().append("g").classed("edgeLabel",true).style("opacity",0);svgEdgeLabels.each(function(e){var edge=g.edge(e),label=addLabel(d3.select(this),g.edge(e),0,0).classed("label",true),bbox=label.node().getBBox();if(edge.labelId){label.attr("id",edge.labelId)}if(!_.has(edge,"width")){edge.width=bbox.width}if(!_.has(edge,"height")){edge.height=bbox.height}});util.applyTransition(svgEdgeLabels.exit(),g).style("opacity",0).remove();return svgEdgeLabels}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],5:[function(require,module,exports){"use strict";var _=require("./lodash"),intersectNode=require("./intersect/intersect-node"),util=require("./util"),d3=require("./d3");module.exports=createEdgePaths;function createEdgePaths(selection,g,arrows){var svgPaths=selection.selectAll("g.edgePath").data(g.edges(),function(e){return util.edgeToId(e)}).classed("update",true);enter(svgPaths,g);exit(svgPaths,g);util.applyTransition(svgPaths,g).style("opacity",1);svgPaths.each(function(e){var domEdge=d3.select(this);var edge=g.edge(e);edge.elem=this;if(edge.id){domEdge.attr("id",edge.id)}util.applyClass(domEdge,edge["class"],(domEdge.classed("update")?"update ":"")+"edgePath")});svgPaths.selectAll("path.path").each(function(e){var edge=g.edge(e);edge.arrowheadId=_.uniqueId("arrowhead");var domEdge=d3.select(this).attr("marker-end",function(){return"url(#"+edge.arrowheadId+")"}).style("fill","none");util.applyTransition(domEdge,g).attr("d",function(e){return calcPoints(g,e)});util.applyStyle(domEdge,edge.style)});svgPaths.selectAll("defs *").remove();svgPaths.selectAll("defs").each(function(e){var edge=g.edge(e),arrowhead=arrows[edge.arrowhead];arrowhead(d3.select(this),edge.arrowheadId,edge,"arrowhead")});return svgPaths}function calcPoints(g,e){var edge=g.edge(e),tail=g.node(e.v),head=g.node(e.w),points=edge.points.slice(1,edge.points.length-1);points.unshift(intersectNode(tail,points[0]));points.push(intersectNode(head,points[points.length-1]));return createLine(edge,points)}function createLine(edge,points){var line=d3.svg.line().x(function(d){return d.x}).y(function(d){return d.y});if(_.has(edge,"lineInterpolate")){line.interpolate(edge.lineInterpolate)}if(_.has(edge,"lineTension")){line.tension(Number(edge.lineTension))}return line(points)}function getCoords(elem){var bbox=elem.getBBox(),matrix=elem.getTransformToElement(elem.ownerSVGElement).translate(bbox.width/2,bbox.height/2);return{x:matrix.e,y:matrix.f}}function enter(svgPaths,g){var svgPathsEnter=svgPaths.enter().append("g").attr("class","edgePath").style("opacity",0);svgPathsEnter.append("path").attr("class","path").attr("d",function(e){var edge=g.edge(e),sourceElem=g.node(e.v).elem,points=_.range(edge.points.length).map(function(){return getCoords(sourceElem)});return createLine(edge,points)});svgPathsEnter.append("defs")}function exit(svgPaths,g){var svgPathExit=svgPaths.exit();util.applyTransition(svgPathExit,g).style("opacity",0).remove();util.applyTransition(svgPathExit.select("path.path"),g).attr("d",function(e){var source=g.node(e.v);if(source){var points=_.range(this.pathSegList.length).map(function(){return source});return createLine({},points)}else{return d3.select(this).attr("d")}})}},{"./d3":7,"./intersect/intersect-node":14,"./lodash":20,"./util":25}],6:[function(require,module,exports){"use strict";var _=require("./lodash"),addLabel=require("./label/add-label"),util=require("./util"),d3=require("./d3");module.exports=createNodes;function createNodes(selection,g,shapes){var simpleNodes=g.nodes().filter(function(v){return!util.isSubgraph(g,v)});var svgNodes=selection.selectAll("g.node").data(simpleNodes,function(v){return v}).classed("update",true);svgNodes.selectAll("*").remove();svgNodes.enter().append("g").attr("class",function(v){return"node_"+v}).attr("name",function(v){return g.node(v).label}).classed("node",true).style("opacity",0);svgNodes.each(function(v){var node=g.node(v),thisGroup=d3.select(this),labelGroup=thisGroup.append("g").attr("class","label"),labelDom=addLabel(labelGroup,node),shape=shapes[node.shape],bbox=_.pick(labelDom.node().getBBox(),"width","height");node.elem=this;if(node.id){thisGroup.attr("id",node.id)}if(node.labelId){labelGroup.attr("id",node.labelId)}util.applyClass(thisGroup,node["class"],(thisGroup.classed("update")?"update ":"")+"node");if(_.has(node,"width")){bbox.width=node.width}if(_.has(node,"height")){bbox.height=node.height}bbox.width+=node.paddingLeft+node.paddingRight;bbox.height+=node.paddingTop+node.paddingBottom;labelGroup.attr("transform","translate("+(node.paddingLeft-node.paddingRight)/2+","+(node.paddingTop-node.paddingBottom)/2+")");var shapeSvg=shape(d3.select(this),bbox,node);util.applyStyle(shapeSvg,node.style);var requiredWidth=0,requiredHeight=0;var nextNode=g.node(g.parent(v));while(nextNode){var tempGroup=thisGroup.append("g");var tempLabel=addLabel(tempGroup,nextNode);var tempBBox=tempLabel.node().getBBox();tempBBox.width-=50;requiredWidth=Math.max(requiredWidth,tempBBox.width);requiredHeight=Math.max(requiredHeight,tempBBox.height);tempLabel.remove();nextNode=g.node(g.parent(nextNode.label))}var shapeBBox=shapeSvg.node().getBBox();shapeBBox.width=Math.max(shapeBBox.width,requiredWidth);shapeBBox.height=Math.max(shapeBBox.height,requiredHeight);node.width=shapeBBox.width;node.height=shapeBBox.height});util.applyTransition(svgNodes.exit(),g).style("opacity",0).remove();return svgNodes}},{"./d3":7,"./label/add-label":18,"./lodash":20,"./util":25}],7:[function(require,module,exports){module.exports=window.d3},{}],8:[function(require,module,exports){var dagre;if(require){try{dagre=require("dagre")}catch(e){}}if(!dagre){dagre=window.dagre}module.exports=dagre},{dagre:27}],9:[function(require,module,exports){var graphlib;if(require){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:57}],10:[function(require,module,exports){module.exports={node:require("./intersect-node"),circle:require("./intersect-circle"),ellipse:require("./intersect-ellipse"),polygon:require("./intersect-polygon"),rect:require("./intersect-rect")}},{"./intersect-circle":11,"./intersect-ellipse":12,"./intersect-node":14,"./intersect-polygon":15,"./intersect-rect":16}],11:[function(require,module,exports){var intersectEllipse=require("./intersect-ellipse");module.exports=intersectCircle;function intersectCircle(node,rx,point){return intersectEllipse(node,rx,rx,point)}},{"./intersect-ellipse":12}],12:[function(require,module,exports){module.exports=intersectEllipse;function intersectEllipse(node,rx,ry,point){var cx=node.x;var cy=node.y;var px=cx-point.x;var py=cy-point.y;var det=Math.sqrt(rx*rx*py*py+ry*ry*px*px);var dx=Math.abs(rx*ry*px/det);if(point.x<cx){dx=-dx}var dy=Math.abs(rx*ry*py/det);if(point.y<cy){dy=-dy}return{x:cx+dx,y:cy+dy}}},{}],13:[function(require,module,exports){module.exports=intersectLine;function intersectLine(p1,p2,q1,q2){var a1,a2,b1,b2,c1,c2;var r1,r2,r3,r4;var denom,offset,num;var x,y;a1=p2.y-p1.y;b1=p1.x-p2.x;c1=p2.x*p1.y-p1.x*p2.y;r3=a1*q1.x+b1*q1.y+c1;r4=a1*q2.x+b1*q2.y+c1;if(r3!==0&&r4!==0&&sameSign(r3,r4)){return}a2=q2.y-q1.y;b2=q1.x-q2.x;c2=q2.x*q1.y-q1.x*q2.y;r1=a2*p1.x+b2*p1.yy+c2;r2=a2*p2.x+b2*p2.y+c2;if(r1!==0&&r2!==0&&sameSign(r1,r2)){return}denom=a1*b2-a2*b1;if(denom===0){return}offset=Math.abs(denom/2);num=b1*c2-b2*c1;x=num<0?(num-offset)/denom:(num+offset)/denom;num=a2*c1-a1*c2;y=num<0?(num-offset)/denom:(num+offset)/denom;return{x:x,y:y}}function sameSign(r1,r2){return r1*r2>0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i<polyPoints.length;i++){var p1=polyPoints[i];var p2=polyPoints[i<polyPoints.length-1?i+1:0];var intersect=intersectLine(node,point,{x:left+p1.x,y:top+p1.y},{x:left+p2.x,y:top+p2.y});if(intersect){intersections.push(intersect)}}if(!intersections.length){console.log("NO INTERSECTION FOUND, RETURN NODE CENTER",node);return node}if(intersections.length>1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distp<distq?-1:distp===distq?0:1})}return intersections[0]}},{"./intersect-line":13}],16:[function(require,module,exports){module.exports=intersectRect;function intersectRect(node,point){var x=node.x;var y=node.y;var dx=point.x-x;var dy=point.y-y;var w=node.width/2;var h=node.height/2;var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;i<lines.length;i++){domNode.append("tspan").attr("xml:space","preserve").attr("dy","1em").attr("x","1").text(lines[i])}util.applyStyle(domNode,node.labelStyle);return domNode}function processEscapeSequences(text){var newText="",escaped=false,ch;for(var i=0;i<text.length;++i){ch=text[i];if(escaped){switch(ch){case"n":newText+="\n";break;default:newText+=ch}escaped=false}else if(ch==="\\"){escaped=true}else{newText+=ch}}return newText}},{"../util":25}],20:[function(require,module,exports){var lodash;if(require){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:77}],21:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3"),_=require("./lodash");module.exports=positionEdgeLabels;function positionEdgeLabels(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(e){var edge=g.edge(e);return _.has(edge,"x")?"translate("+edge.x+","+edge.y+")":""}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./lodash":20,"./util":25}],22:[function(require,module,exports){"use strict";var util=require("./util"),d3=require("./d3");module.exports=positionNodes;function positionNodes(selection,g){var created=selection.filter(function(){return!d3.select(this).classed("update")});function translate(v){var node=g.node(v);return"translate("+node.x+","+node.y+")"}created.attr("transform",translate);util.applyTransition(selection,g).style("opacity",1).attr("transform",translate)}},{"./d3":7,"./util":25}],23:[function(require,module,exports){var _=require("./lodash"),layout=require("./dagre").layout;module.exports=render;function render(){var createNodes=require("./create-nodes"),createClusters=require("./create-clusters"),createEdgeLabels=require("./create-edge-labels"),createEdgePaths=require("./create-edge-paths"),positionNodes=require("./position-nodes"),positionEdgeLabels=require("./position-edge-labels"),shapes=require("./shapes"),arrows=require("./arrows");var fn=function(svg,g){preProcessGraph(g);var outputGroup=createOrSelectGroup(svg,"output"),clustersGroup=createOrSelectGroup(outputGroup,"clusters"),edgePathsGroup=createOrSelectGroup(outputGroup,"edgePaths"),edgeLabels=createEdgeLabels(createOrSelectGroup(outputGroup,"edgeLabels"),g),nodes=createNodes(createOrSelectGroup(outputGroup,"nodes"),g,shapes);layout(g);positionNodes(nodes,g);positionEdgeLabels(edgeLabels,g);createEdgePaths(edgePathsGroup,g,arrows);createClusters(clustersGroup,g);postProcessGraph(g)};fn.createNodes=function(value){if(!arguments.length)return createNodes;createNodes=value;return fn};fn.createClusters=function(value){if(!arguments.length)return createClusters;createClusters=value;return fn};fn.createEdgeLabels=function(value){if(!arguments.length)return createEdgeLabels;createEdgeLabels=value;return fn};fn.createEdgePaths=function(value){if(!arguments.length)return createEdgePaths;createEdgePaths=value;return fn};fn.shapes=function(value){if(!arguments.length)return shapes;shapes=value;return fn};fn.arrows=function(value){if(!arguments.length)return arrows;arrows=value;return fn};return fn}var NODE_DEFAULT_ATTRS={paddingLeft:0,paddingRight:0,paddingTop:0,paddingBottom:0,rx:0,ry:0,shape:"rect"};var EDGE_DEFAULT_ATTRS={arrowhead:"normal",lineInterpolate:"linear"};function preProcessGraph(g){g.nodes().forEach(function(v){var node=g.node(v);if(!_.has(node,"label")){node.label=v}if(_.has(node,"paddingX")){_.defaults(node,{paddingLeft:node.paddingX,paddingRight:node.paddingX})}if(_.has(node,"paddingY")){_.defaults(node,{paddingTop:node.paddingY,paddingBottom:node.paddingY})}if(_.has(node,"padding")){_.defaults(node,{paddingLeft:node.padding,paddingRight:node.padding,paddingTop:node.padding,paddingBottom:node.padding})}if(_.has(node,"paddingLeft")){_.defaults(node,{paddingLeft:node.paddingLeft})}if(_.has(node,"paddingRight")){_.defaults(node,{paddingRight:node.paddingRight})}if(_.has(node,"paddingTop")){_.defaults(node,{paddingTop:node.paddingTop})}if(_.has(node,"paddingBottom")){_.defaults(node,{paddingBottom:node.paddingBottom})}_.defaults(node,NODE_DEFAULT_ATTRS);_.each(["paddingLeft","paddingRight","paddingTop","paddingBottom"],function(k){node[k]=Number(node[k])});if(_.has(node,"width")){node._prevWidth=node.width}if(_.has(node,"height")){node._prevHeight=node.height}});g.edges().forEach(function(e){var edge=g.edge(e);if(!_.has(edge,"label")){edge.label=""}_.defaults(edge,EDGE_DEFAULT_ATTRS)})}function postProcessGraph(g){_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"_prevWidth")){node.width=node._prevWidth}else{delete node.width}if(_.has(node,"_prevHeight")){node.height=node._prevHeight}else{delete node.height}delete node._prevWidth;delete node._prevHeight})}function createOrSelectGroup(root,name){var selection=root.select("g."+name);if(selection.empty()){selection=root.append("g").attr("class",name)}return selection}},{"./arrows":2,"./create-clusters":3,"./create-edge-labels":4,"./create-edge-paths":5,"./create-nodes":6,"./dagre":8,"./lodash":20,"./position-edge-labels":21,"./position-nodes":22,"./shapes":24}],24:[function(require,module,exports){"use strict";var intersectRect=require("./intersect/intersect-rect"),intersectEllipse=require("./intersect/intersect-ellipse"),intersectCircle=require("./intersect/intersect-circle"),intersectPolygon=require("./intersect/intersect-polygon");module.exports={rect:rect,ellipse:ellipse,circle:circle,diamond:diamond};function rect(parent,bbox,node){var shapeSvg=parent.insert("rect",":first-child").attr("rx",node.rx).attr("ry",node.ry).attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("width",bbox.width).attr("height",bbox.height);node.intersect=function(point){return intersectRect(node,point)};return shapeSvg}function ellipse(parent,bbox,node){var rx=bbox.width/2,ry=bbox.height/2,shapeSvg=parent.insert("ellipse",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("rx",rx).attr("ry",ry);node.intersect=function(point){return intersectEllipse(node,rx,ry,point)};return shapeSvg}function circle(parent,bbox,node){var r=Math.max(bbox.width,bbox.height)/2,shapeSvg=parent.insert("circle",":first-child").attr("x",-bbox.width/2).attr("y",-bbox.height/2).attr("r",r);node.intersect=function(point){return intersectCircle(node,r,point)};return shapeSvg}function diamond(parent,bbox,node){var w=bbox.width*Math.SQRT2/2,h=bbox.height*Math.SQRT2/2,points=[{x:0,y:-h},{x:-w,y:0},{x:0,y:h},{x:w,y:0}],shapeSvg=parent.insert("polygon",":first-child").attr("points",points.map(function(p){return p.x+","+p.y}).join(" "));node.intersect=function(p){return intersectPolygon(node,points,p)};return shapeSvg}},{"./intersect/intersect-circle":11,"./intersect/intersect-ellipse":12,"./intersect/intersect-polygon":15,"./intersect/intersect-rect":16}],25:[function(require,module,exports){var _=require("./lodash");module.exports={isSubgraph:isSubgraph,getMaxChildPaddingTop:getMaxChildPaddingTop,orderByRank:orderByRank,edgeToId:edgeToId,applyStyle:applyStyle,applyClass:applyClass,applyTransition:applyTransition};function isSubgraph(g,v){return!!g.children(v).length}function getMaxChildPaddingTop(g,v){var maxPadding=0;var children=g.children(v);for(var i=0;i<children.length;i++){var child=g.node(children[i]);if(child.paddingTop&&child.paddingTop>maxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;i<children.length;i++){var thisRank=getRank(g,children[i])+1;if(thisRank>maxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":36,"./util":55}],30:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":36}],31:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],32:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":33,"./lodash":36,"./util":55}],33:[function(require,module,exports){module.exports=require(9)},{"/Users/andrew/Documents/dev/dagre-d3/lib/graphlib.js":9,graphlib:57}],34:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){
 parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":36,"./util":55}],39:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":36}],40:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":36}],41:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":33,"../lodash":36}],42:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":33,"../lodash":36,"../util":55,"./add-subgraph-constraints":39,"./build-layer-graph":41,"./cross-count":42,"./init-order":44,"./sort-subgraph":46}],44:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":36}],45:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);function pass2(v){if(visited[v]!==2){visited[v]++;var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":33,"../lodash":36,"../util":55}],50:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":36,"../util":55,"./bk":49}],51:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":33,"../lodash":36,"./util":54}],52:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":51,"./network-simplex":53,"./util":54}],53:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;
 
 if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":75}],64:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":58,"./dijkstra":61,"./dijkstra-all":60,"./find-cycles":62,"./floyd-warshall":63,"./is-acyclic":65,"./postorder":66,"./preorder":67,"./prim":68,"./tarjan":69,"./topsort":70}],65:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":70}],66:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":59}],67:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":59}],68:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":75}],72:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" 	\f \ufeff"+"\n\r\u2028\u2029"+" ᠎             　";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","	":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index<length){if(array[index]===value){return index}}return-1}function cacheIndexOf(cache,value){var type=typeof value;cache=cache.cache;if(type=="boolean"||value==null){return cache[value]?0:-1}if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value;cache=(cache=cache[type])&&cache[key];return type=="object"?cache&&baseIndexOf(cache,value)>-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++index<length){var value=ac[index],other=bc[index];if(value!==other){if(value>other||typeof value=="undefined"){return 1}if(value<other||typeof other=="undefined"){return-1}}}return a.index-b.index}function createCache(array){var index=-1,length=array.length,first=array[0],mid=array[length/2|0],last=array[length-1];if(first&&typeof first=="object"&&mid&&typeof mid=="object"&&last&&typeof last=="object"){return false}var cache=getObject();cache["false"]=cache["null"]=cache["true"]=cache["undefined"]=false;var result=getObject();result.array=array;result.cache=cache;result.push=cachePush;while(++index<length){result.push(array[index])}return result}function escapeStringChar(match){return"\\"+stringEscapes[match]}function getArray(){return arrayPool.pop()||[]}function getObject(){return objectPool.pop()||{array:null,cache:null,criteria:null,"false":false,index:0,"null":false,number:null,object:null,push:null,string:null,"true":false,undefined:false,value:null}}function releaseArray(array){array.length=0;if(arrayPool.length<maxPoolSize){arrayPool.push(array)}}function releaseObject(object){var cache=object.cache;if(cache){releaseObject(cache)}object.array=object.cache=object.criteria=object.object=object.number=object.string=object.value=null;if(objectPool.length<maxPoolSize){objectPool.push(object)}}function slice(array,start,end){start||(start=0);if(typeof end=="undefined"){end=array?array.length:0}var index=-1,length=end-start||0,result=Array(length<0?0:length);while(++index<length){result[index]=array[start+index]}return result}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Boolean=context.Boolean,Date=context.Date,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayRef=[];var objectProto=Object.prototype;var oldDash=context._;var toString=objectProto.toString;var reNative=RegExp("^"+String(toString).replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/toString| for [^\]]+/g,".*?")+"$");var ceil=Math.ceil,clearTimeout=context.clearTimeout,floor=Math.floor,fnToString=Function.prototype.toString,getPrototypeOf=isNative(getPrototypeOf=Object.getPrototypeOf)&&getPrototypeOf,hasOwnProperty=objectProto.hasOwnProperty,push=arrayRef.push,setTimeout=context.setTimeout,splice=arrayRef.splice,unshift=arrayRef.unshift;var defineProperty=function(){try{var o={},func=isNative(func=Object.defineProperty)&&func,result=func(o,o,o)&&func}catch(e){}return result}();var nativeCreate=isNative(nativeCreate=Object.create)&&nativeCreate,nativeIsArray=isNative(nativeIsArray=Array.isArray)&&nativeIsArray,nativeIsFinite=context.isFinite,nativeIsNaN=context.isNaN,nativeKeys=isNative(nativeKeys=Object.keys)&&nativeKeys,nativeMax=Math.max,nativeMin=Math.min,nativeParseInt=context.parseInt,nativeRandom=Math.random;var ctorByClass={};ctorByClass[arrayClass]=Array;ctorByClass[boolClass]=Boolean;ctorByClass[dateClass]=Date;ctorByClass[funcClass]=Function;ctorByClass[objectClass]=Object;ctorByClass[numberClass]=Number;ctorByClass[regexpClass]=RegExp;ctorByClass[stringClass]=String;function lodash(value){return value&&typeof value=="object"&&!isArray(value)&&hasOwnProperty.call(value,"__wrapped__")?value:new lodashWrapper(value)}function lodashWrapper(value,chainAll){this.__chain__=!!chainAll;this.__wrapped__=value}lodashWrapper.prototype=lodash.prototype;var support=lodash.support={};support.funcDecomp=!isNative(context.WinRTError)&&reThis.test(runInContext);support.funcNames=typeof Function.name=="string";lodash.templateSettings={escape:/<%-([\s\S]+?)%>/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{};
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index af276e7b8d40c..f78fbaf33f656 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -678,7 +678,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
    *
    * Note: Return statements are NOT allowed in the given body.
    */
-  private def withScope[U](body: => U): U = RDDOperationScope.withScope[U](this)(body)
+  private[spark] def withScope[U](body: => U): U = RDDOperationScope.withScope[U](this)(body)
 
   // Methods for creating RDDs
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
index 2725826f421f4..6b09dfafc889c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala
@@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonInclude.Include
 import com.fasterxml.jackson.databind.ObjectMapper
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{Logging, SparkContext}
 
 /**
  * A general, named code block representing an operation that instantiates RDDs.
@@ -43,9 +43,8 @@ import org.apache.spark.SparkContext
 @JsonPropertyOrder(Array("id", "name", "parent"))
 private[spark] class RDDOperationScope(
     val name: String,
-    val parent: Option[RDDOperationScope] = None) {
-
-  val id: Int = RDDOperationScope.nextScopeId()
+    val parent: Option[RDDOperationScope] = None,
+    val id: String = RDDOperationScope.nextScopeId().toString) {
 
   def toJson: String = {
     RDDOperationScope.jsonMapper.writeValueAsString(this)
@@ -75,7 +74,7 @@ private[spark] class RDDOperationScope(
  * A collection of utility methods to construct a hierarchical representation of RDD scopes.
  * An RDD scope tracks the series of operations that created a given RDD.
  */
-private[spark] object RDDOperationScope {
+private[spark] object RDDOperationScope extends Logging {
   private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule)
   private val scopeCounter = new AtomicInteger(0)
 
@@ -88,14 +87,25 @@ private[spark] object RDDOperationScope {
 
   /**
    * Execute the given body such that all RDDs created in this body will have the same scope.
-   * The name of the scope will be the name of the method that immediately encloses this one.
+   * The name of the scope will be the first method name in the stack trace that is not the
+   * same as this method's.
    *
    * Note: Return statements are NOT allowed in body.
    */
   private[spark] def withScope[T](
       sc: SparkContext,
       allowNesting: Boolean = false)(body: => T): T = {
-    val callerMethodName = Thread.currentThread.getStackTrace()(3).getMethodName
+    val stackTrace = Thread.currentThread.getStackTrace().tail // ignore "Thread#getStackTrace"
+    val ourMethodName = stackTrace(1).getMethodName // i.e. withScope
+    // Climb upwards to find the first method that's called something different
+    val callerMethodName = stackTrace
+      .find(_.getMethodName != ourMethodName)
+      .map(_.getMethodName)
+      .getOrElse {
+        // Log a warning just in case, but this should almost certainly never happen
+        logWarning("No valid method name for this RDD operation scope!")
+        "N/A"
+      }
     withScope[T](sc, callerMethodName, allowNesting, ignoreParent = false)(body)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index 33a7303be711c..d6a5085db1efb 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -116,8 +116,8 @@ private[ui] object RDDOperationGraph extends Logging {
         // which may be nested inside of other clusters
         val rddScopes = rdd.scope.map { scope => scope.getAllScopes }.getOrElse(Seq.empty)
         val rddClusters = rddScopes.map { scope =>
-          val clusterId = scope.name + "_" + scope.id
-          val clusterName = scope.name
+          val clusterId = scope.id
+          val clusterName = scope.name.replaceAll("\\n", "\\\\n")
           clusters.getOrElseUpdate(clusterId, new RDDOperationCluster(clusterId, clusterName))
         }
         // Build the cluster hierarchy for this RDD
@@ -177,7 +177,7 @@ private[ui] object RDDOperationGraph extends Logging {
 
   /** Return the dot representation of a node in an RDDOperationGraph. */
   private def makeDotNode(node: RDDOperationNode): String = {
-    s"""${node.id} [label="${node.name} (${node.id})"]"""
+    s"""${node.id} [label="${node.name} [${node.id}]"]"""
   }
 
   /** Return the dot representation of a subgraph in an RDDOperationGraph. */
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
index db465a6a9eb55..4434ed858c60c 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala
@@ -22,13 +22,13 @@ import org.scalatest.{BeforeAndAfter, FunSuite}
 import org.apache.spark.{TaskContext, Partition, SparkContext}
 
 /**
- *
+ * Tests whether scopes are passed from the RDD operation to the RDDs correctly.
  */
 class RDDOperationScopeSuite extends FunSuite with BeforeAndAfter {
   private var sc: SparkContext = null
   private val scope1 = new RDDOperationScope("scope1")
-  private val scope2 = new RDDOperationScope("scope2", parent = Some(scope1))
-  private val scope3 = new RDDOperationScope("scope3", parent = Some(scope2))
+  private val scope2 = new RDDOperationScope("scope2", Some(scope1))
+  private val scope3 = new RDDOperationScope("scope3", Some(scope2))
 
   before {
     sc = new SparkContext("local", "test")
@@ -48,9 +48,9 @@ class RDDOperationScopeSuite extends FunSuite with BeforeAndAfter {
     val scope1Json = scope1.toJson
     val scope2Json = scope2.toJson
     val scope3Json = scope3.toJson
-    assert(scope1Json === s"""{"id":${scope1.id},"name":"scope1"}""")
-    assert(scope2Json === s"""{"id":${scope2.id},"name":"scope2","parent":$scope1Json}""")
-    assert(scope3Json === s"""{"id":${scope3.id},"name":"scope3","parent":$scope2Json}""")
+    assert(scope1Json === s"""{"id":"${scope1.id}","name":"scope1"}""")
+    assert(scope2Json === s"""{"id":"${scope2.id}","name":"scope2","parent":$scope1Json}""")
+    assert(scope3Json === s"""{"id":"${scope3.id}","name":"scope3","parent":$scope2Json}""")
     assert(RDDOperationScope.fromJson(scope1Json) === scope1)
     assert(RDDOperationScope.fromJson(scope2Json) === scope2)
     assert(RDDOperationScope.fromJson(scope3Json) === scope3)
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
index 6715aede7928a..060c2f23eded8 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
@@ -65,6 +65,9 @@ class DirectKafkaInputDStream[
   val maxRetries = context.sparkContext.getConf.getInt(
     "spark.streaming.kafka.maxRetries", 1)
 
+  // Keep this consistent with how other streams are named (e.g. "Flume polling stream [2]")
+  private[streaming] override def name: String = s"Kafka direct stream [$id]"
+
   protected[streaming] override val checkpointData =
     new DirectKafkaInputDStreamCheckpointData
 
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index d7cf500577c2a..8be2707528d93 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -189,7 +189,7 @@ object KafkaUtils {
       sc: SparkContext,
       kafkaParams: Map[String, String],
       offsetRanges: Array[OffsetRange]
-    ): RDD[(K, V)] = {
+    ): RDD[(K, V)] = sc.withScope {
     val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key, mmd.message)
     val leaders = leadersForRanges(kafkaParams, offsetRanges)
     new KafkaRDD[K, V, KD, VD, (K, V)](sc, kafkaParams, offsetRanges, leaders, messageHandler)
@@ -224,7 +224,7 @@ object KafkaUtils {
       offsetRanges: Array[OffsetRange],
       leaders: Map[TopicAndPartition, Broker],
       messageHandler: MessageAndMetadata[K, V] => R
-    ): RDD[R] = {
+    ): RDD[R] = sc.withScope {
     val leaderMap = if (leaders.isEmpty) {
       leadersForRanges(kafkaParams, offsetRanges)
     } else {
@@ -233,7 +233,8 @@ object KafkaUtils {
         case (tp: TopicAndPartition, Broker(host, port)) => (tp, (host, port))
       }.toMap
     }
-    new KafkaRDD[K, V, KD, VD, R](sc, kafkaParams, offsetRanges, leaderMap, messageHandler)
+    val cleanedHandler = sc.clean(messageHandler)
+    new KafkaRDD[K, V, KD, VD, R](sc, kafkaParams, offsetRanges, leaderMap, cleanedHandler)
   }
 
   /**
@@ -256,7 +257,7 @@ object KafkaUtils {
       valueDecoderClass: Class[VD],
       kafkaParams: JMap[String, String],
       offsetRanges: Array[OffsetRange]
-    ): JavaPairRDD[K, V] = {
+    ): JavaPairRDD[K, V] = jsc.sc.withScope {
     implicit val keyCmt: ClassTag[K] = ClassTag(keyClass)
     implicit val valueCmt: ClassTag[V] = ClassTag(valueClass)
     implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass)
@@ -294,7 +295,7 @@ object KafkaUtils {
       offsetRanges: Array[OffsetRange],
       leaders: JMap[TopicAndPartition, Broker],
       messageHandler: JFunction[MessageAndMetadata[K, V], R]
-    ): JavaRDD[R] = {
+    ): JavaRDD[R] = jsc.sc.withScope {
     implicit val keyCmt: ClassTag[K] = ClassTag(keyClass)
     implicit val valueCmt: ClassTag[V] = ClassTag(valueClass)
     implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass)
@@ -348,8 +349,9 @@ object KafkaUtils {
       fromOffsets: Map[TopicAndPartition, Long],
       messageHandler: MessageAndMetadata[K, V] => R
   ): InputDStream[R] = {
+    val cleanedHandler = ssc.sc.clean(messageHandler)
     new DirectKafkaInputDStream[K, V, KD, VD, R](
-      ssc, kafkaParams, fromOffsets, messageHandler)
+      ssc, kafkaParams, fromOffsets, cleanedHandler)
   }
 
   /**
@@ -469,11 +471,12 @@ object KafkaUtils {
     implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass)
     implicit val valueDecoderCmt: ClassTag[VD] = ClassTag(valueDecoderClass)
     implicit val recordCmt: ClassTag[R] = ClassTag(recordClass)
+    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call _)
     createDirectStream[K, V, KD, VD, R](
       jssc.ssc,
       Map(kafkaParams.toSeq: _*),
       Map(fromOffsets.mapValues { _.longValue() }.toSeq: _*),
-      messageHandler.call _
+      cleanedHandler
     )
   }
 
diff --git a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
index 3c0ef94cb0fab..40f5f18547236 100644
--- a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
+++ b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -35,7 +35,6 @@ import org.eclipse.paho.client.mqttv3.MqttMessage
 import org.eclipse.paho.client.mqttv3.MqttTopic
 import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence
 
-import org.apache.spark.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.dstream._
@@ -57,6 +56,8 @@ class MQTTInputDStream(
     storageLevel: StorageLevel
   ) extends ReceiverInputDStream[String](ssc_) {
 
+  private[streaming] override def name: String = s"MQTT stream [$id]"
+
   def getReceiver(): Receiver[String] = {
     new MQTTReceiver(brokerUrl, topic, storageLevel)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 1d2ecdd341813..7f181bcecd4bf 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -34,7 +34,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.spark._
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.input.FixedLengthBinaryInputFormat
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContextState._
 import org.apache.spark.streaming.dstream._
@@ -241,15 +241,34 @@ class StreamingContext private[streaming] (
 
   private[streaming] def getNewInputStreamId() = nextInputStreamId.getAndIncrement()
 
+  /**
+   * Execute a block of code in a scope such that all new DStreams created in this body will
+   * be part of the same scope. For more detail, see the comments in `doCompute`.
+   *
+   * Note: Return statements are NOT allowed in the given body.
+   */
+  private[streaming] def withScope[U](body: => U): U = sparkContext.withScope(body)
+
+  /**
+   * Execute a block of code in a scope such that all new DStreams created in this body will
+   * be part of the same scope. For more detail, see the comments in `doCompute`.
+   *
+   * Note: Return statements are NOT allowed in the given body.
+   */
+  private[streaming] def withNamedScope[U](name: String)(body: => U): U = {
+    RDDOperationScope.withScope(sc, name, allowNesting = false, ignoreParent = false)(body)
+  }
+
   /**
    * Create an input stream with any arbitrary user implemented receiver.
    * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html
    * @param receiver Custom implementation of Receiver
    */
   @deprecated("Use receiverStream", "1.0.0")
-  def networkStream[T: ClassTag](
-    receiver: Receiver[T]): ReceiverInputDStream[T] = {
-    receiverStream(receiver)
+  def networkStream[T: ClassTag](receiver: Receiver[T]): ReceiverInputDStream[T] = {
+    withNamedScope("network stream") {
+      receiverStream(receiver)
+    }
   }
 
   /**
@@ -257,9 +276,10 @@ class StreamingContext private[streaming] (
    * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html
    * @param receiver Custom implementation of Receiver
    */
-  def receiverStream[T: ClassTag](
-    receiver: Receiver[T]): ReceiverInputDStream[T] = {
-    new PluggableInputDStream[T](this, receiver)
+  def receiverStream[T: ClassTag](receiver: Receiver[T]): ReceiverInputDStream[T] = {
+    withNamedScope("receiver stream") {
+      new PluggableInputDStream[T](this, receiver)
+    }
   }
 
   /**
@@ -279,7 +299,7 @@ class StreamingContext private[streaming] (
       name: String,
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2,
       supervisorStrategy: SupervisorStrategy = ActorSupervisorStrategy.defaultStrategy
-    ): ReceiverInputDStream[T] = {
+    ): ReceiverInputDStream[T] = withNamedScope("actor stream") {
     receiverStream(new ActorReceiver[T](props, name, storageLevel, supervisorStrategy))
   }
 
@@ -296,7 +316,7 @@ class StreamingContext private[streaming] (
       hostname: String,
       port: Int,
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
-    ): ReceiverInputDStream[String] = {
+    ): ReceiverInputDStream[String] = withNamedScope("socket text stream") {
     socketStream[String](hostname, port, SocketReceiver.bytesToLines, storageLevel)
   }
 
@@ -334,7 +354,7 @@ class StreamingContext private[streaming] (
       hostname: String,
       port: Int,
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
-    ): ReceiverInputDStream[T] = {
+    ): ReceiverInputDStream[T] = withNamedScope("raw socket stream") {
     new RawInputDStream[T](this, hostname, port, storageLevel)
   }
 
@@ -408,7 +428,7 @@ class StreamingContext private[streaming] (
    * file system. File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
    */
-  def textFileStream(directory: String): DStream[String] = {
+  def textFileStream(directory: String): DStream[String] = withNamedScope("text file stream") {
     fileStream[LongWritable, Text, TextInputFormat](directory).map(_._2.toString)
   }
 
@@ -430,7 +450,7 @@ class StreamingContext private[streaming] (
   @Experimental
   def binaryRecordsStream(
       directory: String,
-      recordLength: Int): DStream[Array[Byte]] = {
+      recordLength: Int): DStream[Array[Byte]] = withNamedScope("binary records stream") {
     val conf = sc_.hadoopConfiguration
     conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength)
     val br = fileStream[LongWritable, BytesWritable, FixedLengthBinaryInputFormat](
@@ -477,7 +497,7 @@ class StreamingContext private[streaming] (
   /**
    * Create a unified DStream from multiple DStreams of the same type and same slide duration.
    */
-  def union[T: ClassTag](streams: Seq[DStream[T]]): DStream[T] = {
+  def union[T: ClassTag](streams: Seq[DStream[T]]): DStream[T] = withScope {
     new UnionDStream[T](streams.toArray)
   }
 
@@ -488,7 +508,7 @@ class StreamingContext private[streaming] (
   def transform[T: ClassTag](
       dstreams: Seq[DStream[_]],
       transformFunc: (Seq[RDD[_]], Time) => RDD[T]
-    ): DStream[T] = {
+    ): DStream[T] = withScope {
     new TransformedDStream[T](dstreams, sparkContext.clean(transformFunc))
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 64de7526a6a34..5977481e1f081 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -25,12 +25,13 @@ import scala.language.implicitConversions
 import scala.reflect.ClassTag
 import scala.util.matching.Regex
 
-import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.rdd.{BlockRDD, PairRDDFunctions, RDD}
+import org.apache.spark.{Logging, SparkContext, SparkException}
+import org.apache.spark.rdd.{BlockRDD, PairRDDFunctions, RDD, RDDOperationScope}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext.rddToFileName
 import org.apache.spark.streaming.scheduler.Job
+import org.apache.spark.streaming.ui.UIUtils
 import org.apache.spark.util.{CallSite, MetadataCleaner, Utils}
 
 /**
@@ -73,7 +74,7 @@ abstract class DStream[T: ClassTag] (
   def dependencies: List[DStream[_]]
 
   /** Method that generates a RDD for the given time */
-  def compute (validTime: Time): Option[RDD[T]]
+  def compute(validTime: Time): Option[RDD[T]]
 
   // =======================================================================
   // Methods and fields available on all DStreams
@@ -111,6 +112,44 @@ abstract class DStream[T: ClassTag] (
   /* Set the creation call site */
   private[streaming] val creationSite = DStream.getCreationSite()
 
+  /**
+   * The base scope associated with the operation that created this DStream.
+   *
+   * This is the medium through which we pass the DStream operation name (e.g. updatedStateByKey)
+   * to the RDDs created by this DStream. Note that we never use this scope directly in RDDs.
+   * Instead, we instantiate a new scope during each call to `compute` based on this one.
+   *
+   * This is not defined if the DStream is created outside of one of the public DStream operations.
+   */
+  protected[streaming] val baseScope: Option[String] = {
+    Option(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY))
+  }
+
+  /**
+   * Make a scope that groups RDDs created in the same DStream operation in the same batch.
+   *
+   * Each DStream produces many scopes and each scope may be shared by other DStreams created
+   * in the same operation. Separate calls to the same DStream operation create separate scopes.
+   * For instance, `dstream.map(...).map(...)` creates two separate scopes per batch.
+   */
+  private def makeScope(time: Time): Option[RDDOperationScope] = {
+    baseScope.map { bsJson =>
+      val formattedBatchTime = UIUtils.formatBatchTime(
+        time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
+      val bs = RDDOperationScope.fromJson(bsJson)
+      val baseName = bs.name // e.g. countByWindow, "kafka stream [0]"
+      val scopeName =
+        if (baseName.length > 10) {
+          // If the operation name is too long, wrap the line
+          s"$baseName\n@ $formattedBatchTime"
+        } else {
+          s"$baseName @ $formattedBatchTime"
+        }
+      val scopeId = s"${bs.id}_${time.milliseconds}"
+      new RDDOperationScope(scopeName, id = scopeId)
+    }
+  }
+
   /** Persist the RDDs of this DStream with the given storage level */
   def persist(level: StorageLevel): DStream[T] = {
     if (this.isInitialized) {
@@ -295,28 +334,23 @@ abstract class DStream[T: ClassTag] (
    * Get the RDD corresponding to the given time; either retrieve it from cache
    * or compute-and-cache it.
    */
-  private[streaming] def getOrCompute(time: Time): Option[RDD[T]] = {
+  private[streaming] final def getOrCompute(time: Time): Option[RDD[T]] = {
     // If RDD was already generated, then retrieve it from HashMap,
     // or else compute the RDD
     generatedRDDs.get(time).orElse {
       // Compute the RDD if time is valid (e.g. correct time in a sliding window)
       // of RDD generation, else generate nothing.
       if (isTimeValid(time)) {
-        // Set the thread-local property for call sites to this DStream's creation site
-        // such that RDDs generated by compute gets that as their creation site.
-        // Note that this `getOrCompute` may get called from another DStream which may have
-        // set its own call site. So we store its call site in a temporary variable,
-        // set this DStream's creation site, generate RDDs and then restore the previous call site.
-        val prevCallSite = ssc.sparkContext.getCallSite()
-        ssc.sparkContext.setCallSite(creationSite)
-        // Disable checks for existing output directories in jobs launched by the streaming
-        // scheduler, since we may need to write output to an existing directory during checkpoint
-        // recovery; see SPARK-4835 for more details. We need to have this call here because
-        // compute() might cause Spark jobs to be launched.
-        val rddOption = PairRDDFunctions.disableOutputSpecValidation.withValue(true) {
-          compute(time)
+
+        val rddOption = createRDDWithLocalProperties(time) {
+          // Disable checks for existing output directories in jobs launched by the streaming
+          // scheduler, since we may need to write output to an existing directory during checkpoint
+          // recovery; see SPARK-4835 for more details. We need to have this call here because
+          // compute() might cause Spark jobs to be launched.
+          PairRDDFunctions.disableOutputSpecValidation.withValue(true) {
+            compute(time)
+          }
         }
-        ssc.sparkContext.setCallSite(prevCallSite)
 
         rddOption.foreach { case newRDD =>
           // Register the generated RDD for caching and checkpointing
@@ -337,6 +371,41 @@ abstract class DStream[T: ClassTag] (
     }
   }
 
+  /**
+   * Wrap a body of code such that the call site and operation scope
+   * information are passed to the RDDs created in this body properly.
+   */
+  protected def createRDDWithLocalProperties[U](time: Time)(body: => U): U = {
+    val scopeKey = SparkContext.RDD_SCOPE_KEY
+    val scopeNoOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY
+    // Pass this DStream's operation scope and creation site information to RDDs through
+    // thread-local properties in our SparkContext. Since this method may be called from another
+    // DStream, we need to temporarily store any old scope and creation site information to
+    // restore them later after setting our own.
+    val prevCallSite = ssc.sparkContext.getCallSite()
+    val prevScope = ssc.sparkContext.getLocalProperty(scopeKey)
+    val prevScopeNoOverride = ssc.sparkContext.getLocalProperty(scopeNoOverrideKey)
+
+    try {
+      ssc.sparkContext.setCallSite(creationSite)
+      // Use the DStream's base scope for this RDD so we can (1) preserve the higher level
+      // DStream operation name, and (2) share this scope with other DStreams created in the
+      // same operation. Disallow nesting so that low-level Spark primitives do not show up.
+      // TODO: merge callsites with scopes so we can just reuse the code there
+      makeScope(time).foreach { s =>
+        ssc.sparkContext.setLocalProperty(scopeKey, s.toJson)
+        ssc.sparkContext.setLocalProperty(scopeNoOverrideKey, "true")
+      }
+
+      body
+    } finally {
+      // Restore any state that was modified before returning
+      ssc.sparkContext.setCallSite(prevCallSite)
+      ssc.sparkContext.setLocalProperty(scopeKey, prevScope)
+      ssc.sparkContext.setLocalProperty(scopeNoOverrideKey, prevScopeNoOverride)
+    }
+  }
+
   /**
    * Generate a SparkStreaming job for the given time. This is an internal method that
    * should not be called directly. This default implementation creates a job
@@ -456,7 +525,7 @@ abstract class DStream[T: ClassTag] (
   // =======================================================================
 
   /** Return a new DStream by applying a function to all elements of this DStream. */
-  def map[U: ClassTag](mapFunc: T => U): DStream[U] = {
+  def map[U: ClassTag](mapFunc: T => U): DStream[U] = ssc.withScope {
     new MappedDStream(this, context.sparkContext.clean(mapFunc))
   }
 
@@ -464,26 +533,31 @@ abstract class DStream[T: ClassTag] (
    * Return a new DStream by applying a function to all elements of this DStream,
    * and then flattening the results
    */
-  def flatMap[U: ClassTag](flatMapFunc: T => Traversable[U]): DStream[U] = {
+  def flatMap[U: ClassTag](flatMapFunc: T => Traversable[U]): DStream[U] = ssc.withScope {
     new FlatMappedDStream(this, context.sparkContext.clean(flatMapFunc))
   }
 
   /** Return a new DStream containing only the elements that satisfy a predicate. */
-  def filter(filterFunc: T => Boolean): DStream[T] = new FilteredDStream(this, filterFunc)
+  def filter(filterFunc: T => Boolean): DStream[T] = ssc.withScope {
+    new FilteredDStream(this, filterFunc)
+  }
 
   /**
    * Return a new DStream in which each RDD is generated by applying glom() to each RDD of
    * this DStream. Applying glom() to an RDD coalesces all elements within each partition into
    * an array.
    */
-  def glom(): DStream[Array[T]] = new GlommedDStream(this)
-
+  def glom(): DStream[Array[T]] = ssc.withScope {
+    new GlommedDStream(this)
+  }
 
   /**
    * Return a new DStream with an increased or decreased level of parallelism. Each RDD in the
    * returned DStream has exactly numPartitions partitions.
    */
-  def repartition(numPartitions: Int): DStream[T] = this.transform(_.repartition(numPartitions))
+  def repartition(numPartitions: Int): DStream[T] = ssc.withScope {
+    this.transform(_.repartition(numPartitions))
+  }
 
   /**
    * Return a new DStream in which each RDD is generated by applying mapPartitions() to each RDDs
@@ -493,7 +567,7 @@ abstract class DStream[T: ClassTag] (
   def mapPartitions[U: ClassTag](
       mapPartFunc: Iterator[T] => Iterator[U],
       preservePartitioning: Boolean = false
-    ): DStream[U] = {
+    ): DStream[U] = ssc.withScope {
     new MapPartitionedDStream(this, context.sparkContext.clean(mapPartFunc), preservePartitioning)
   }
 
@@ -501,14 +575,15 @@ abstract class DStream[T: ClassTag] (
    * Return a new DStream in which each RDD has a single element generated by reducing each RDD
    * of this DStream.
    */
-  def reduce(reduceFunc: (T, T) => T): DStream[T] =
+  def reduce(reduceFunc: (T, T) => T): DStream[T] = ssc.withScope {
     this.map(x => (null, x)).reduceByKey(reduceFunc, 1).map(_._2)
+  }
 
   /**
    * Return a new DStream in which each RDD has a single element generated by counting each RDD
    * of this DStream.
    */
-  def count(): DStream[Long] = {
+  def count(): DStream[Long] = ssc.withScope {
     this.map(_ => (null, 1L))
         .transform(_.union(context.sparkContext.makeRDD(Seq((null, 0L)), 1)))
         .reduceByKey(_ + _)
@@ -522,15 +597,16 @@ abstract class DStream[T: ClassTag] (
    * `numPartitions` not specified).
    */
   def countByValue(numPartitions: Int = ssc.sc.defaultParallelism)(implicit ord: Ordering[T] = null)
-      : DStream[(T, Long)] =
+      : DStream[(T, Long)] = ssc.withScope {
     this.map(x => (x, 1L)).reduceByKey((x: Long, y: Long) => x + y, numPartitions)
+  }
 
   /**
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
   @deprecated("use foreachRDD", "0.9.0")
-  def foreach(foreachFunc: RDD[T] => Unit): Unit = {
+  def foreach(foreachFunc: RDD[T] => Unit): Unit = ssc.withScope {
     this.foreachRDD(foreachFunc)
   }
 
@@ -539,7 +615,7 @@ abstract class DStream[T: ClassTag] (
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
   @deprecated("use foreachRDD", "0.9.0")
-  def foreach(foreachFunc: (RDD[T], Time) => Unit): Unit = {
+  def foreach(foreachFunc: (RDD[T], Time) => Unit): Unit = ssc.withScope {
     this.foreachRDD(foreachFunc)
   }
 
@@ -547,7 +623,7 @@ abstract class DStream[T: ClassTag] (
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreachRDD(foreachFunc: RDD[T] => Unit) {
+  def foreachRDD(foreachFunc: RDD[T] => Unit): Unit = ssc.withScope {
     this.foreachRDD((r: RDD[T], t: Time) => foreachFunc(r))
   }
 
@@ -555,7 +631,7 @@ abstract class DStream[T: ClassTag] (
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreachRDD(foreachFunc: (RDD[T], Time) => Unit) {
+  def foreachRDD(foreachFunc: (RDD[T], Time) => Unit): Unit = ssc.withScope {
     // because the DStream is reachable from the outer object here, and because 
     // DStreams can't be serialized with closures, we can't proactively check 
     // it for serializability and so we pass the optional false to SparkContext.clean
@@ -566,7 +642,7 @@ abstract class DStream[T: ClassTag] (
    * Return a new DStream in which each RDD is generated by applying a function
    * on each RDD of 'this' DStream.
    */
-  def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = {
+  def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = ssc.withScope {
     // because the DStream is reachable from the outer object here, and because 
     // DStreams can't be serialized with closures, we can't proactively check 
     // it for serializability and so we pass the optional false to SparkContext.clean
@@ -578,7 +654,7 @@ abstract class DStream[T: ClassTag] (
    * Return a new DStream in which each RDD is generated by applying a function
    * on each RDD of 'this' DStream.
    */
-  def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = {
+  def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = ssc.withScope {
     // because the DStream is reachable from the outer object here, and because 
     // DStreams can't be serialized with closures, we can't proactively check 
     // it for serializability and so we pass the optional false to SparkContext.clean
@@ -596,7 +672,7 @@ abstract class DStream[T: ClassTag] (
    */
   def transformWith[U: ClassTag, V: ClassTag](
       other: DStream[U], transformFunc: (RDD[T], RDD[U]) => RDD[V]
-    ): DStream[V] = {
+    ): DStream[V] = ssc.withScope {
     // because the DStream is reachable from the outer object here, and because 
     // DStreams can't be serialized with closures, we can't proactively check 
     // it for serializability and so we pass the optional false to SparkContext.clean
@@ -610,7 +686,7 @@ abstract class DStream[T: ClassTag] (
    */
   def transformWith[U: ClassTag, V: ClassTag](
       other: DStream[U], transformFunc: (RDD[T], RDD[U], Time) => RDD[V]
-    ): DStream[V] = {
+    ): DStream[V] = ssc.withScope {
     // because the DStream is reachable from the outer object here, and because 
     // DStreams can't be serialized with closures, we can't proactively check 
     // it for serializability and so we pass the optional false to SparkContext.clean
@@ -628,7 +704,7 @@ abstract class DStream[T: ClassTag] (
    * Print the first ten elements of each RDD generated in this DStream. This is an output
    * operator, so this DStream will be registered as an output stream and there materialized.
    */
-  def print() {
+  def print(): Unit = ssc.withScope {
     print(10)
   }
 
@@ -636,7 +712,7 @@ abstract class DStream[T: ClassTag] (
    * Print the first num elements of each RDD generated in this DStream. This is an output
    * operator, so this DStream will be registered as an output stream and there materialized.
    */
-  def print(num: Int) {
+  def print(num: Int): Unit = ssc.withScope {
     def foreachFunc: (RDD[T], Time) => Unit = {
       (rdd: RDD[T], time: Time) => {
         val firstNum = rdd.take(num + 1)
@@ -668,7 +744,7 @@ abstract class DStream[T: ClassTag] (
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
    */
-  def window(windowDuration: Duration, slideDuration: Duration): DStream[T] = {
+  def window(windowDuration: Duration, slideDuration: Duration): DStream[T] = ssc.withScope {
     new WindowedDStream(this, windowDuration, slideDuration)
   }
 
@@ -686,7 +762,7 @@ abstract class DStream[T: ClassTag] (
       reduceFunc: (T, T) => T,
       windowDuration: Duration,
       slideDuration: Duration
-    ): DStream[T] = {
+    ): DStream[T] = ssc.withScope {
     this.reduce(reduceFunc).window(windowDuration, slideDuration).reduce(reduceFunc)
   }
 
@@ -711,7 +787,7 @@ abstract class DStream[T: ClassTag] (
       invReduceFunc: (T, T) => T,
       windowDuration: Duration,
       slideDuration: Duration
-    ): DStream[T] = {
+    ): DStream[T] = ssc.withScope {
       this.map(x => (1, x))
           .reduceByKeyAndWindow(reduceFunc, invReduceFunc, windowDuration, slideDuration, 1)
           .map(_._2)
@@ -727,7 +803,9 @@ abstract class DStream[T: ClassTag] (
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
    */
-  def countByWindow(windowDuration: Duration, slideDuration: Duration): DStream[Long] = {
+  def countByWindow(
+      windowDuration: Duration,
+      slideDuration: Duration): DStream[Long] = ssc.withScope {
     this.map(_ => 1L).reduceByWindow(_ + _, _ - _, windowDuration, slideDuration)
   }
 
@@ -748,8 +826,7 @@ abstract class DStream[T: ClassTag] (
       slideDuration: Duration,
       numPartitions: Int = ssc.sc.defaultParallelism)
       (implicit ord: Ordering[T] = null)
-      : DStream[(T, Long)] =
-  {
+      : DStream[(T, Long)] = ssc.withScope {
     this.map(x => (x, 1L)).reduceByKeyAndWindow(
       (x: Long, y: Long) => x + y,
       (x: Long, y: Long) => x - y,
@@ -764,19 +841,21 @@ abstract class DStream[T: ClassTag] (
    * Return a new DStream by unifying data of another DStream with this DStream.
    * @param that Another DStream having the same slideDuration as this DStream.
    */
-  def union(that: DStream[T]): DStream[T] = new UnionDStream[T](Array(this, that))
+  def union(that: DStream[T]): DStream[T] = ssc.withScope {
+    new UnionDStream[T](Array(this, that))
+  }
 
   /**
    * Return all the RDDs defined by the Interval object (both end times included)
    */
-  def slice(interval: Interval): Seq[RDD[T]] = {
+  def slice(interval: Interval): Seq[RDD[T]] = ssc.withScope {
     slice(interval.beginTime, interval.endTime)
   }
 
   /**
    * Return all the RDDs between 'fromTime' to 'toTime' (both included)
    */
-  def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = {
+  def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = ssc.withScope {
     if (!isInitialized) {
       throw new SparkException(this + " has not been initialized")
     }
@@ -810,7 +889,7 @@ abstract class DStream[T: ClassTag] (
    * The file name at each batch interval is generated based on `prefix` and
    * `suffix`: "prefix-TIME_IN_MS.suffix".
    */
-  def saveAsObjectFiles(prefix: String, suffix: String = "") {
+  def saveAsObjectFiles(prefix: String, suffix: String = ""): Unit = ssc.withScope {
     val saveFunc = (rdd: RDD[T], time: Time) => {
       val file = rddToFileName(prefix, suffix, time)
       rdd.saveAsObjectFile(file)
@@ -823,7 +902,7 @@ abstract class DStream[T: ClassTag] (
    * of elements. The file name at each batch interval is generated based on
    * `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix".
    */
-  def saveAsTextFiles(prefix: String, suffix: String = "") {
+  def saveAsTextFiles(prefix: String, suffix: String = ""): Unit = ssc.withScope {
     val saveFunc = (rdd: RDD[T], time: Time) => {
       val file = rddToFileName(prefix, suffix, time)
       rdd.saveAsTextFile(file)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
index 685a32e1d280d..c109ceccc6989 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
@@ -37,7 +37,7 @@ class ForEachDStream[T: ClassTag] (
   override def generateJob(time: Time): Option[Job] = {
     parent.getOrCompute(time) match {
       case Some(rdd) =>
-        val jobFunc = () => {
+        val jobFunc = () => createRDDWithLocalProperties(time) {
           ssc.sparkContext.setCallSite(creationSite)
           foreachFunc(rdd, time)
         }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 9716adb62817c..d58c99a8ff321 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Time, Duration, StreamingContext}
-
 import scala.reflect.ClassTag
 
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDDOperationScope
+import org.apache.spark.streaming.{Time, Duration, StreamingContext}
+import org.apache.spark.util.Utils
+
 /**
  * This is the abstract base class for all input streams. This class provides methods
  * start() and stop() which is called by Spark Streaming system to start and stop receiving data.
@@ -44,10 +47,31 @@ abstract class InputDStream[T: ClassTag] (@transient ssc_ : StreamingContext)
   /** This is an unique identifier for the input stream. */
   val id = ssc.getNewInputStreamId()
 
+  /** A human-readable name of this InputDStream */
+  private[streaming] def name: String = {
+    // e.g. FlumePollingDStream -> "Flume polling stream"
+    val newName = Utils.getFormattedClassName(this)
+      .replaceAll("InputDStream", "Stream")
+      .split("(?=[A-Z])")
+      .filter(_.nonEmpty)
+      .mkString(" ")
+      .toLowerCase
+      .capitalize
+    s"$newName [$id]"
+  }
+
   /**
-   * The name of this InputDStream. By default, it's the class name with its id.
+   * The base scope associated with the operation that created this DStream.
+   *
+   * For InputDStreams, we use the name of this DStream as the scope name.
+   * If an outer scope is given, we assume that it includes an alternative name for this stream.
    */
-  private[streaming] def name: String = s"${getClass.getSimpleName}-$id"
+  protected[streaming] override val baseScope: Option[String] = {
+    val scopeName = Option(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY))
+      .map { json => RDDOperationScope.fromJson(json).name + s" [$id]" }
+      .getOrElse(name.toLowerCase)
+    Some(new RDDOperationScope(scopeName).toJson)
+  }
 
   /**
    * Checks whether the 'time' is valid wrt slideDuration for generating RDD.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 8a58571632447..884a8e8b52289 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -46,7 +46,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    */
-  def groupByKey(): DStream[(K, Iterable[V])] = {
+  def groupByKey(): DStream[(K, Iterable[V])] = ssc.withScope {
     groupByKey(defaultPartitioner())
   }
 
@@ -54,7 +54,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    */
-  def groupByKey(numPartitions: Int): DStream[(K, Iterable[V])] = {
+  def groupByKey(numPartitions: Int): DStream[(K, Iterable[V])] = ssc.withScope {
     groupByKey(defaultPartitioner(numPartitions))
   }
 
@@ -62,7 +62,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying `groupByKey` on each RDD. The supplied
    * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
    */
-  def groupByKey(partitioner: Partitioner): DStream[(K, Iterable[V])] = {
+  def groupByKey(partitioner: Partitioner): DStream[(K, Iterable[V])] = ssc.withScope {
     val createCombiner = (v: V) => ArrayBuffer[V](v)
     val mergeValue = (c: ArrayBuffer[V], v: V) => (c += v)
     val mergeCombiner = (c1: ArrayBuffer[V], c2: ArrayBuffer[V]) => (c1 ++ c2)
@@ -75,7 +75,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * merged using the associative reduce function. Hash partitioning is used to generate the RDDs
    * with Spark's default number of partitions.
    */
-  def reduceByKey(reduceFunc: (V, V) => V): DStream[(K, V)] = {
+  def reduceByKey(reduceFunc: (V, V) => V): DStream[(K, V)] = ssc.withScope {
     reduceByKey(reduceFunc, defaultPartitioner())
   }
 
@@ -84,7 +84,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * merged using the supplied reduce function. Hash partitioning is used to generate the RDDs
    * with `numPartitions` partitions.
    */
-  def reduceByKey(reduceFunc: (V, V) => V, numPartitions: Int): DStream[(K, V)] = {
+  def reduceByKey(
+      reduceFunc: (V, V) => V,
+      numPartitions: Int): DStream[(K, V)] = ssc.withScope {
     reduceByKey(reduceFunc, defaultPartitioner(numPartitions))
   }
 
@@ -93,7 +95,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * merged using the supplied reduce function. org.apache.spark.Partitioner is used to control
    * the partitioning of each RDD.
    */
-  def reduceByKey(reduceFunc: (V, V) => V, partitioner: Partitioner): DStream[(K, V)] = {
+  def reduceByKey(
+      reduceFunc: (V, V) => V,
+      partitioner: Partitioner): DStream[(K, V)] = ssc.withScope {
     val cleanedReduceFunc = ssc.sc.clean(reduceFunc)
     combineByKey((v: V) => v, cleanedReduceFunc, cleanedReduceFunc, partitioner)
   }
@@ -104,11 +108,11 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * org.apache.spark.rdd.PairRDDFunctions in the Spark core documentation for more information.
    */
   def combineByKey[C: ClassTag](
-    createCombiner: V => C,
-    mergeValue: (C, V) => C,
-    mergeCombiner: (C, C) => C,
-    partitioner: Partitioner,
-    mapSideCombine: Boolean = true): DStream[(K, C)] = {
+      createCombiner: V => C,
+      mergeValue: (C, V) => C,
+      mergeCombiner: (C, C) => C,
+      partitioner: Partitioner,
+      mapSideCombine: Boolean = true): DStream[(K, C)] = ssc.withScope {
     new ShuffledDStream[K, V, C](self, createCombiner, mergeValue, mergeCombiner, partitioner,
       mapSideCombine)
   }
@@ -121,7 +125,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    */
-  def groupByKeyAndWindow(windowDuration: Duration): DStream[(K, Iterable[V])] = {
+  def groupByKeyAndWindow(windowDuration: Duration): DStream[(K, Iterable[V])] = ssc.withScope {
     groupByKeyAndWindow(windowDuration, self.slideDuration, defaultPartitioner())
   }
 
@@ -136,8 +140,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    *                       DStream's batching interval
    */
   def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration)
-      : DStream[(K, Iterable[V])] =
-  {
+      : DStream[(K, Iterable[V])] = ssc.withScope {
     groupByKeyAndWindow(windowDuration, slideDuration, defaultPartitioner())
   }
 
@@ -157,7 +160,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       windowDuration: Duration,
       slideDuration: Duration,
       numPartitions: Int
-    ): DStream[(K, Iterable[V])] = {
+    ): DStream[(K, Iterable[V])] = ssc.withScope {
     groupByKeyAndWindow(windowDuration, slideDuration, defaultPartitioner(numPartitions))
   }
 
@@ -176,7 +179,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       windowDuration: Duration,
       slideDuration: Duration,
       partitioner: Partitioner
-    ): DStream[(K, Iterable[V])] = {
+    ): DStream[(K, Iterable[V])] = ssc.withScope {
     val createCombiner = (v: Iterable[V]) => new ArrayBuffer[V] ++= v
     val mergeValue = (buf: ArrayBuffer[V], v: Iterable[V]) => buf ++= v
     val mergeCombiner = (buf1: ArrayBuffer[V], buf2: ArrayBuffer[V]) => buf1 ++= buf2
@@ -198,7 +201,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def reduceByKeyAndWindow(
       reduceFunc: (V, V) => V,
       windowDuration: Duration
-    ): DStream[(K, V)] = {
+    ): DStream[(K, V)] = ssc.withScope {
     reduceByKeyAndWindow(reduceFunc, windowDuration, self.slideDuration, defaultPartitioner())
   }
 
@@ -217,7 +220,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       reduceFunc: (V, V) => V,
       windowDuration: Duration,
       slideDuration: Duration
-    ): DStream[(K, V)] = {
+    ): DStream[(K, V)] = ssc.withScope {
     reduceByKeyAndWindow(reduceFunc, windowDuration, slideDuration, defaultPartitioner())
   }
 
@@ -238,7 +241,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       windowDuration: Duration,
       slideDuration: Duration,
       numPartitions: Int
-    ): DStream[(K, V)] = {
+    ): DStream[(K, V)] = ssc.withScope {
     reduceByKeyAndWindow(reduceFunc, windowDuration, slideDuration,
       defaultPartitioner(numPartitions))
   }
@@ -260,7 +263,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       windowDuration: Duration,
       slideDuration: Duration,
       partitioner: Partitioner
-    ): DStream[(K, V)] = {
+    ): DStream[(K, V)] = ssc.withScope {
     val cleanedReduceFunc = ssc.sc.clean(reduceFunc)
     self.reduceByKey(cleanedReduceFunc, partitioner)
         .window(windowDuration, slideDuration)
@@ -294,8 +297,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       slideDuration: Duration = self.slideDuration,
       numPartitions: Int = ssc.sc.defaultParallelism,
       filterFunc: ((K, V)) => Boolean = null
-    ): DStream[(K, V)] = {
-
+    ): DStream[(K, V)] = ssc.withScope {
     reduceByKeyAndWindow(
       reduceFunc, invReduceFunc, windowDuration,
       slideDuration, defaultPartitioner(numPartitions), filterFunc
@@ -328,7 +330,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       slideDuration: Duration,
       partitioner: Partitioner,
       filterFunc: ((K, V)) => Boolean
-    ): DStream[(K, V)] = {
+    ): DStream[(K, V)] = ssc.withScope {
 
     val cleanedReduceFunc = ssc.sc.clean(reduceFunc)
     val cleanedInvReduceFunc = ssc.sc.clean(invReduceFunc)
@@ -349,7 +351,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    */
   def updateStateByKey[S: ClassTag](
       updateFunc: (Seq[V], Option[S]) => Option[S]
-    ): DStream[(K, S)] = {
+    ): DStream[(K, S)] = ssc.withScope {
     updateStateByKey(updateFunc, defaultPartitioner())
   }
 
@@ -365,7 +367,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def updateStateByKey[S: ClassTag](
       updateFunc: (Seq[V], Option[S]) => Option[S],
       numPartitions: Int
-    ): DStream[(K, S)] = {
+    ): DStream[(K, S)] = ssc.withScope {
     updateStateByKey(updateFunc, defaultPartitioner(numPartitions))
   }
 
@@ -382,7 +384,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def updateStateByKey[S: ClassTag](
       updateFunc: (Seq[V], Option[S]) => Option[S],
       partitioner: Partitioner
-    ): DStream[(K, S)] = {
+    ): DStream[(K, S)] = ssc.withScope {
     val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => {
       iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
     }
@@ -406,7 +408,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
       partitioner: Partitioner,
       rememberPartitioner: Boolean
-    ): DStream[(K, S)] = {
+    ): DStream[(K, S)] = ssc.withScope {
      new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner, None)
   }
 
@@ -425,7 +427,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       updateFunc: (Seq[V], Option[S]) => Option[S],
       partitioner: Partitioner,
       initialRDD: RDD[(K, S)]
-    ): DStream[(K, S)] = {
+    ): DStream[(K, S)] = ssc.withScope {
     val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => {
       iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
     }
@@ -451,7 +453,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       partitioner: Partitioner,
       rememberPartitioner: Boolean,
       initialRDD: RDD[(K, S)]
-    ): DStream[(K, S)] = {
+    ): DStream[(K, S)] = ssc.withScope {
      new StateDStream(self, ssc.sc.clean(updateFunc), partitioner,
        rememberPartitioner, Some(initialRDD))
   }
@@ -460,7 +462,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying a map function to the value of each key-value pairs in
    * 'this' DStream without changing the key.
    */
-  def mapValues[U: ClassTag](mapValuesFunc: V => U): DStream[(K, U)] = {
+  def mapValues[U: ClassTag](mapValuesFunc: V => U): DStream[(K, U)] = ssc.withScope {
     new MapValuedDStream[K, V, U](self, mapValuesFunc)
   }
 
@@ -470,7 +472,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    */
   def flatMapValues[U: ClassTag](
       flatMapValuesFunc: V => TraversableOnce[U]
-    ): DStream[(K, U)] = {
+    ): DStream[(K, U)] = ssc.withScope {
     new FlatMapValuedDStream[K, V, U](self, flatMapValuesFunc)
   }
 
@@ -479,7 +481,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Hash partitioning is used to generate the RDDs with Spark's default number
    * of partitions.
    */
-  def cogroup[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Iterable[V], Iterable[W]))] = {
+  def cogroup[W: ClassTag](
+      other: DStream[(K, W)]): DStream[(K, (Iterable[V], Iterable[W]))] = ssc.withScope {
     cogroup(other, defaultPartitioner())
   }
 
@@ -487,8 +490,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying 'cogroup' between RDDs of `this` DStream and `other` DStream.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    */
-  def cogroup[W: ClassTag](other: DStream[(K, W)], numPartitions: Int)
-  : DStream[(K, (Iterable[V], Iterable[W]))] = {
+  def cogroup[W: ClassTag](
+      other: DStream[(K, W)],
+      numPartitions: Int): DStream[(K, (Iterable[V], Iterable[W]))] = ssc.withScope {
     cogroup(other, defaultPartitioner(numPartitions))
   }
 
@@ -499,7 +503,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def cogroup[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
-    ): DStream[(K, (Iterable[V], Iterable[W]))] = {
+    ): DStream[(K, (Iterable[V], Iterable[W]))] = ssc.withScope {
     self.transformWith(
       other,
       (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.cogroup(rdd2, partitioner)
@@ -510,7 +514,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
    */
-  def join[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, W))] = {
+  def join[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, W))] = ssc.withScope {
     join[W](other, defaultPartitioner())
   }
 
@@ -518,7 +522,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    */
-  def join[W: ClassTag](other: DStream[(K, W)], numPartitions: Int): DStream[(K, (V, W))] = {
+  def join[W: ClassTag](
+      other: DStream[(K, W)],
+      numPartitions: Int): DStream[(K, (V, W))] = ssc.withScope {
     join[W](other, defaultPartitioner(numPartitions))
   }
 
@@ -529,7 +535,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def join[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
-    ): DStream[(K, (V, W))] = {
+    ): DStream[(K, (V, W))] = ssc.withScope {
     self.transformWith(
       other,
       (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.join(rdd2, partitioner)
@@ -541,7 +547,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
    * number of partitions.
    */
-  def leftOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, Option[W]))] = {
+  def leftOuterJoin[W: ClassTag](
+      other: DStream[(K, W)]): DStream[(K, (V, Option[W]))] = ssc.withScope {
     leftOuterJoin[W](other, defaultPartitioner())
   }
 
@@ -553,7 +560,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def leftOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       numPartitions: Int
-    ): DStream[(K, (V, Option[W]))] = {
+    ): DStream[(K, (V, Option[W]))] = ssc.withScope {
     leftOuterJoin[W](other, defaultPartitioner(numPartitions))
   }
 
@@ -565,7 +572,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def leftOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
-    ): DStream[(K, (V, Option[W]))] = {
+    ): DStream[(K, (V, Option[W]))] = ssc.withScope {
     self.transformWith(
       other,
       (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.leftOuterJoin(rdd2, partitioner)
@@ -577,7 +584,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
    * number of partitions.
    */
-  def rightOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Option[V], W))] = {
+  def rightOuterJoin[W: ClassTag](
+      other: DStream[(K, W)]): DStream[(K, (Option[V], W))] = ssc.withScope {
     rightOuterJoin[W](other, defaultPartitioner())
   }
 
@@ -589,7 +597,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def rightOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       numPartitions: Int
-    ): DStream[(K, (Option[V], W))] = {
+    ): DStream[(K, (Option[V], W))] = ssc.withScope {
     rightOuterJoin[W](other, defaultPartitioner(numPartitions))
   }
 
@@ -601,7 +609,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def rightOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
-    ): DStream[(K, (Option[V], W))] = {
+    ): DStream[(K, (Option[V], W))] = ssc.withScope {
     self.transformWith(
       other,
       (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.rightOuterJoin(rdd2, partitioner)
@@ -613,7 +621,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
    * number of partitions.
    */
-  def fullOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Option[V], Option[W]))] = {
+  def fullOuterJoin[W: ClassTag](
+      other: DStream[(K, W)]): DStream[(K, (Option[V], Option[W]))] = ssc.withScope {
     fullOuterJoin[W](other, defaultPartitioner())
   }
 
@@ -625,7 +634,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def fullOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       numPartitions: Int
-    ): DStream[(K, (Option[V], Option[W]))] = {
+    ): DStream[(K, (Option[V], Option[W]))] = ssc.withScope {
     fullOuterJoin[W](other, defaultPartitioner(numPartitions))
   }
 
@@ -637,7 +646,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def fullOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
-    ): DStream[(K, (Option[V], Option[W]))] = {
+    ): DStream[(K, (Option[V], Option[W]))] = ssc.withScope {
     self.transformWith(
       other,
       (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.fullOuterJoin(rdd2, partitioner)
@@ -651,7 +660,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def saveAsHadoopFiles[F <: OutputFormat[K, V]](
       prefix: String,
       suffix: String
-    )(implicit fm: ClassTag[F]) {
+    )(implicit fm: ClassTag[F]): Unit = ssc.withScope {
     saveAsHadoopFiles(prefix, suffix, keyClass, valueClass,
       fm.runtimeClass.asInstanceOf[Class[F]])
   }
@@ -667,7 +676,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       valueClass: Class[_],
       outputFormatClass: Class[_ <: OutputFormat[_, _]],
       conf: JobConf = new JobConf(ssc.sparkContext.hadoopConfiguration)
-    ) {
+    ): Unit = ssc.withScope {
     // Wrap conf in SerializableWritable so that ForeachDStream can be serialized for checkpoints
     val serializableConf = new SerializableWritable(conf)
     val saveFunc = (rdd: RDD[(K, V)], time: Time) => {
@@ -684,7 +693,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def saveAsNewAPIHadoopFiles[F <: NewOutputFormat[K, V]](
       prefix: String,
       suffix: String
-    )(implicit fm: ClassTag[F])  {
+    )(implicit fm: ClassTag[F]): Unit = ssc.withScope {
     saveAsNewAPIHadoopFiles(prefix, suffix, keyClass, valueClass,
       fm.runtimeClass.asInstanceOf[Class[F]])
   }
@@ -700,7 +709,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       valueClass: Class[_],
       outputFormatClass: Class[_ <: NewOutputFormat[_, _]],
       conf: Configuration = ssc.sparkContext.hadoopConfiguration
-    ) {
+    ): Unit = ssc.withScope {
     // Wrap conf in SerializableWritable so that ForeachDStream can be serialized for checkpoints
     val serializableConf = new SerializableWritable(conf)
     val saveFunc = (rdd: RDD[(K, V)], time: Time) => {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
new file mode 100644
index 0000000000000..392933102097e
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.{RDD, RDDOperationScope}
+import org.apache.spark.streaming.dstream.{DStream, InputDStream}
+import org.apache.spark.streaming.ui.UIUtils
+
+/**
+ * Tests whether scope information is passed from DStream operations to RDDs correctly.
+ */
+class DStreamScopeSuite extends FunSuite with BeforeAndAfter with BeforeAndAfterAll {
+  private var ssc: StreamingContext = null
+  private val batchDuration: Duration = Seconds(1)
+
+  override def beforeAll(): Unit = {
+    ssc = new StreamingContext(new SparkContext("local", "test"), batchDuration)
+  }
+
+  override def afterAll(): Unit = {
+    ssc.stop(stopSparkContext = true)
+  }
+
+  before { assertPropertiesNotSet() }
+  after { assertPropertiesNotSet() }
+
+  test("dstream without scope") {
+    val dummyStream = new DummyDStream(ssc)
+    dummyStream.initialize(Time(0))
+
+    // This DStream is not instantiated in any scope, so all RDDs
+    // created by this stream should similarly not have a scope
+    assert(dummyStream.baseScope === None)
+    assert(dummyStream.getOrCompute(Time(1000)).get.scope === None)
+    assert(dummyStream.getOrCompute(Time(2000)).get.scope === None)
+    assert(dummyStream.getOrCompute(Time(3000)).get.scope === None)
+  }
+
+  test("input dstream without scope") {
+    val inputStream = new DummyInputDStream(ssc)
+    inputStream.initialize(Time(0))
+
+    val baseScope = inputStream.baseScope.map(RDDOperationScope.fromJson)
+    val scope1 = inputStream.getOrCompute(Time(1000)).get.scope
+    val scope2 = inputStream.getOrCompute(Time(2000)).get.scope
+    val scope3 = inputStream.getOrCompute(Time(3000)).get.scope
+
+    // This DStream is not instantiated in any scope, so all RDDs
+    assertDefined(baseScope, scope1, scope2, scope3)
+    assert(baseScope.get.name.startsWith("dummy stream"))
+    assertScopeCorrect(baseScope.get, scope1.get, 1000)
+    assertScopeCorrect(baseScope.get, scope2.get, 2000)
+    assertScopeCorrect(baseScope.get, scope3.get, 3000)
+  }
+
+  test("scoping simple operations") {
+    val inputStream = new DummyInputDStream(ssc)
+    val mappedStream = inputStream.map { i => i + 1 }
+    val filteredStream = mappedStream.filter { i => i % 2 == 0 }
+    filteredStream.initialize(Time(0))
+
+    val mappedScopeBase = mappedStream.baseScope.map(RDDOperationScope.fromJson)
+    val mappedScope1 = mappedStream.getOrCompute(Time(1000)).get.scope
+    val mappedScope2 = mappedStream.getOrCompute(Time(2000)).get.scope
+    val mappedScope3 = mappedStream.getOrCompute(Time(3000)).get.scope
+    val filteredScopeBase = filteredStream.baseScope.map(RDDOperationScope.fromJson)
+    val filteredScope1 = filteredStream.getOrCompute(Time(1000)).get.scope
+    val filteredScope2 = filteredStream.getOrCompute(Time(2000)).get.scope
+    val filteredScope3 = filteredStream.getOrCompute(Time(3000)).get.scope
+
+    // These streams are defined in their respective scopes "map" and "filter", so all
+    // RDDs created by these streams should inherit the IDs and names of their parent
+    // DStream's base scopes
+    assertDefined(mappedScopeBase, mappedScope1, mappedScope2, mappedScope3)
+    assertDefined(filteredScopeBase, filteredScope1, filteredScope2, filteredScope3)
+    assert(mappedScopeBase.get.name === "map")
+    assert(filteredScopeBase.get.name === "filter")
+    assertScopeCorrect(mappedScopeBase.get, mappedScope1.get, 1000)
+    assertScopeCorrect(mappedScopeBase.get, mappedScope2.get, 2000)
+    assertScopeCorrect(mappedScopeBase.get, mappedScope3.get, 3000)
+    assertScopeCorrect(filteredScopeBase.get, filteredScope1.get, 1000)
+    assertScopeCorrect(filteredScopeBase.get, filteredScope2.get, 2000)
+    assertScopeCorrect(filteredScopeBase.get, filteredScope3.get, 3000)
+  }
+
+  test("scoping nested operations") {
+    val inputStream = new DummyInputDStream(ssc)
+    val countStream = inputStream.countByWindow(Seconds(10), Seconds(1))
+    countStream.initialize(Time(0))
+
+    val countScopeBase = countStream.baseScope.map(RDDOperationScope.fromJson)
+    val countScope1 = countStream.getOrCompute(Time(1000)).get.scope
+    val countScope2 = countStream.getOrCompute(Time(2000)).get.scope
+    val countScope3 = countStream.getOrCompute(Time(3000)).get.scope
+
+    // Assert that all children RDDs inherit the DStream operation name correctly
+    assertDefined(countScopeBase, countScope1, countScope2, countScope3)
+    assert(countScopeBase.get.name === "countByWindow")
+    assertScopeCorrect(countScopeBase.get, countScope1.get, 1000)
+    assertScopeCorrect(countScopeBase.get, countScope2.get, 2000)
+    assertScopeCorrect(countScopeBase.get, countScope3.get, 3000)
+
+    // All streams except the input stream should share the same scopes as `countStream`
+    def testStream(stream: DStream[_]): Unit = {
+      if (stream != inputStream) {
+        val myScopeBase = stream.baseScope.map(RDDOperationScope.fromJson)
+        val myScope1 = stream.getOrCompute(Time(1000)).get.scope
+        val myScope2 = stream.getOrCompute(Time(2000)).get.scope
+        val myScope3 = stream.getOrCompute(Time(3000)).get.scope
+        assertDefined(myScopeBase, myScope1, myScope2, myScope3)
+        assert(myScopeBase === countScopeBase)
+        assert(myScope1 === countScope1)
+        assert(myScope2 === countScope2)
+        assert(myScope3 === countScope3)
+        // Climb upwards to test the parent streams
+        stream.dependencies.foreach(testStream)
+      }
+    }
+    testStream(countStream)
+  }
+
+  /** Assert that the RDD operation scope properties are not set in our SparkContext. */
+  private def assertPropertiesNotSet(): Unit = {
+    assert(ssc != null)
+    assert(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY) == null)
+    assert(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY) == null)
+  }
+
+  /** Assert that the given RDD scope inherits the name and ID of the base scope correctly. */
+  private def assertScopeCorrect(
+      baseScope: RDDOperationScope,
+      rddScope: RDDOperationScope,
+      batchTime: Long): Unit = {
+    assertScopeCorrect(baseScope.id, baseScope.name, rddScope, batchTime)
+  }
+
+  /** Assert that the given RDD scope inherits the base name and ID correctly. */
+  private def assertScopeCorrect(
+      baseScopeId: String,
+      baseScopeName: String,
+      rddScope: RDDOperationScope,
+      batchTime: Long): Unit = {
+    val formattedBatchTime = UIUtils.formatBatchTime(
+      batchTime, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
+    assert(rddScope.id === s"${baseScopeId}_$batchTime")
+    assert(rddScope.name.replaceAll("\\n", " ") === s"$baseScopeName @ $formattedBatchTime")
+  }
+
+  /** Assert that all the specified options are defined. */
+  private def assertDefined[T](options: Option[T]*): Unit = {
+    options.zipWithIndex.foreach { case (o, i) => assert(o.isDefined, s"Option $i was empty!") }
+  }
+
+}
+
+/**
+ * A dummy stream that does absolutely nothing.
+ */
+private class DummyDStream(ssc: StreamingContext) extends DStream[Int](ssc) {
+  override def dependencies: List[DStream[Int]] = List.empty
+  override def slideDuration: Duration = Seconds(1)
+  override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int])
+}
+
+/**
+ * A dummy input stream that does absolutely nothing.
+ */
+private class DummyInputDStream(ssc: StreamingContext) extends InputDStream[Int](ssc) {
+  override def start(): Unit = { }
+  override def stop(): Unit = { }
+  override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int])
+}

From 6525fc0ab0c953c3346d16db0334aaf01ade7ed5 Mon Sep 17 00:00:00 2001
From: Jihong MA <linlin200605@gmail.com>
Date: Mon, 18 May 2015 22:47:50 +0100
Subject: [PATCH 244/320] [SPARK-7063] when lz4 compression is used, it causes
 core dump

this fix is to solve one issue found in lz4 1.2.0, which caused core dump in Spark Core with IBM JDK.  that issue is fixed in lz4 1.3.0 version.

Author: Jihong MA <linlin200605@gmail.com>

Closes #6226 from JihongMA/SPARK-7063-1 and squashes the following commits:

0cca781 [Jihong MA] SPARK-7063
4559ed5 [Jihong MA] SPARK-7063
daa520f [Jihong MA] SPARK-7063 upgrade lz4 jars
71738ee [Jihong MA] Merge remote-tracking branch 'upstream/master'
dfaa971 [Jihong MA] SPARK-7265 minor fix of the content
ace454d [Jihong MA] SPARK-7265 take out PySpark on YARN limitation
9ea0832 [Jihong MA] Merge remote-tracking branch 'upstream/master'
d5bf3f5 [Jihong MA] Merge remote-tracking branch 'upstream/master'
7b842e6 [Jihong MA] Merge remote-tracking branch 'upstream/master'
9c84695 [Jihong MA] SPARK-7265 address review comment
a399aa6 [Jihong MA] SPARK-7265 Improving documentation for Spark SQL Hive support
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 6f525b6ac81a3..c72d7cbf843ef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -495,7 +495,7 @@
       <dependency>
         <groupId>net.jpountz.lz4</groupId>
         <artifactId>lz4</artifactId>
-        <version>1.2.0</version>
+        <version>1.3.0</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>

From eb4632f282d070e1dfd5ffed968fa212896137da Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Mon, 18 May 2015 15:24:31 -0700
Subject: [PATCH 245/320] [SQL] Fix serializability of ORC table scan

A follow-up to #6244.

Author: Michael Armbrust <michael@databricks.com>

Closes #6247 from marmbrus/fixOrcTests and squashes the following commits:

e39ee1b [Michael Armbrust] [SQL] Fix serializability of ORC table scan
---
 .../main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
index e10d3a0b6846c..58b97adb46165 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -195,7 +195,7 @@ private[orc] case class OrcTableScan(
     attributes: Seq[Attribute],
     @transient relation: OrcRelation,
     filters: Array[Filter],
-    inputPaths: Array[FileStatus])
+    @transient inputPaths: Array[FileStatus])
   extends Logging
   with HiveInspectors {
 

From 4fb52f9545ae338fae2d3aeea4bfc35d5df44853 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Mon, 18 May 2015 16:55:45 -0700
Subject: [PATCH 246/320] [SPARK-7624] Revert #4147

Author: Davies Liu <davies@databricks.com>

Closes #6172 from davies/revert_4147 and squashes the following commits:

3bfbbde [Davies Liu] Revert #4147
---
 .../spark/scheduler/local/LocalBackend.scala  | 23 ++-----------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
index e64d06c4d3cfc..3078a1b10be8b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
@@ -18,14 +18,12 @@
 package org.apache.spark.scheduler.local
 
 import java.nio.ByteBuffer
-import java.util.concurrent.TimeUnit
 
 import org.apache.spark.{Logging, SparkConf, SparkContext, SparkEnv, TaskState}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.{Executor, ExecutorBackend}
-import org.apache.spark.rpc.{ThreadSafeRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
+import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler.{SchedulerBackend, TaskSchedulerImpl, WorkerOffer}
-import org.apache.spark.util.{ThreadUtils, Utils}
 
 private case class ReviveOffers()
 
@@ -47,9 +45,6 @@ private[spark] class LocalEndpoint(
     private val totalCores: Int)
   extends ThreadSafeRpcEndpoint with Logging {
 
-  private val reviveThread =
-    ThreadUtils.newDaemonSingleThreadScheduledExecutor("local-revive-thread")
-
   private var freeCores = totalCores
 
   private val localExecutorId = SparkContext.DRIVER_IDENTIFIER
@@ -79,27 +74,13 @@ private[spark] class LocalEndpoint(
       context.reply(true)
   }
 
-
   def reviveOffers() {
     val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
-    val tasks = scheduler.resourceOffers(offers).flatten
-    for (task <- tasks) {
+    for (task <- scheduler.resourceOffers(offers).flatten) {
       freeCores -= scheduler.CPUS_PER_TASK
       executor.launchTask(executorBackend, taskId = task.taskId, attemptNumber = task.attemptNumber,
         task.name, task.serializedTask)
     }
-    if (tasks.isEmpty && scheduler.activeTaskSets.nonEmpty) {
-      // Try to reviveOffer after 1 second, because scheduler may wait for locality timeout
-      reviveThread.schedule(new Runnable {
-        override def run(): Unit = Utils.tryLogNonFatalError {
-          Option(self).foreach(_.send(ReviveOffers))
-        }
-      }, 1000, TimeUnit.MILLISECONDS)
-    }
-  }
-
-  override def onStop(): Unit = {
-    reviveThread.shutdownNow()
   }
 }
 

From 0a7a94eab5fba3d2f2ef14a70c2c1bf4ee21b626 Mon Sep 17 00:00:00 2001
From: jerluc <jeremyalucas@gmail.com>
Date: Mon, 18 May 2015 18:13:29 -0700
Subject: [PATCH 247/320] [SPARK-7621] [STREAMING] Report Kafka errors to
 StreamingListeners

PR per [SPARK-7621](https://issues.apache.org/jira/browse/SPARK-7621), which makes both `KafkaReceiver` and `ReliableKafkaReceiver` report its errors to the `ReceiverTracker`, which in turn will add the events to the bus to fire off any registered `StreamingListener`s.

Author: jerluc <jeremyalucas@gmail.com>

Closes #6204 from jerluc/master and squashes the following commits:

82439a5 [jerluc] [SPARK-7621] [STREAMING] Report Kafka errors to StreamingListeners
---
 .../org/apache/spark/streaming/kafka/KafkaInputDStream.scala    | 2 +-
 .../apache/spark/streaming/kafka/ReliableKafkaReceiver.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
index cca0fac0234e1..04b2dc10d39ea 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
@@ -135,7 +135,7 @@ class KafkaReceiver[
           store((msgAndMetadata.key, msgAndMetadata.message))
         }
       } catch {
-        case e: Throwable => logError("Error handling message; exiting", e)
+        case e: Throwable => reportError("Error handling message; exiting", e)
       }
     }
   }
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
index ea87e960379f1..75f0dfc22b9dc 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala
@@ -267,7 +267,7 @@ class ReliableKafkaReceiver[
           }
         } catch {
           case e: Exception =>
-            logError("Error handling message", e)
+            reportError("Error handling message", e)
         }
       }
     }

From 3a6003866ade45974b43a9e785ec35fb76a32b99 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 18 May 2015 18:24:15 -0700
Subject: [PATCH 248/320] [SPARK-7692] Updated Kinesis examples

- Updated Kinesis examples to use stable API
- Cleaned up comments, etc.
- Renamed KinesisWordCountProducerASL to KinesisWordProducerASL

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6249 from tdas/kinesis-examples and squashes the following commits:

7cc307b [Tathagata Das] More tweaks
f080872 [Tathagata Das] More cleanup
841987f [Tathagata Das] Small update
011cbe2 [Tathagata Das] More fixes
b0d74f9 [Tathagata Das] Updated examples.
---
 .../streaming/JavaKinesisWordCountASL.java    | 245 +++++++++--------
 .../streaming/KinesisWordCountASL.scala       | 260 ++++++++++--------
 2 files changed, 268 insertions(+), 237 deletions(-)

diff --git a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index b0bff27a61c19..06e0ff28afd95 100644
--- a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -20,6 +20,7 @@
 import java.util.List;
 import java.util.regex.Pattern;
 
+import com.amazonaws.regions.RegionUtils;
 import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.FlatMapFunction;
@@ -40,140 +41,146 @@
 import com.google.common.collect.Lists;
 
 /**
- * Java-friendly Kinesis Spark Streaming WordCount example
+ * Consumes messages from a Amazon Kinesis streams and does wordcount.
  *
- * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details
- * on the Kinesis Spark Streaming integration.
+ * This example spins up 1 Kinesis Receiver per shard for the given stream.
+ * It then starts pulling from the last checkpointed sequence number of the given stream.
  *
- * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard
- *   for the given stream.
- * It then starts pulling from the last checkpointed sequence number of the given
- *   <stream-name> and <endpoint-url>. 
+ * Usage: JavaKinesisWordCountASL [app-name] [stream-name] [endpoint-url] [region-name]
+ *   [app-name] is the name of the consumer app, used to track the read data in DynamoDB
+ *   [stream-name] name of the Kinesis stream (ie. mySparkStream)
+ *   [endpoint-url] endpoint of the Kinesis service
+ *     (e.g. https://kinesis.us-east-1.amazonaws.com)
  *
- * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
- *
- * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials 
- *  in the following order of precedence: 
- *         Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
- *         Java System Properties - aws.accessKeyId and aws.secretKey
- *         Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
- *         Instance profile credentials - delivered through the Amazon EC2 metadata service
- *
- * Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>
- *         <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
- *         <endpoint-url> is the endpoint of the Kinesis service 
- *           (ie. https://kinesis.us-east-1.amazonaws.com)
  *
  * Example:
- *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      # export AWS keys if necessary
+ *      $ export AWS_ACCESS_KEY_ID=[your-access-key]
  *      $ export AWS_SECRET_KEY=<your-secret-key>
- *      $ $SPARK_HOME/bin/run-example \
- *            org.apache.spark.examples.streaming.JavaKinesisWordCountASL mySparkStream \
- *            https://kinesis.us-east-1.amazonaws.com
  *
- * Note that number of workers/threads should be 1 more than the number of receivers.
- * This leaves one thread available for actually processing the data.
+ *      # run the example
+ *      $ SPARK_HOME/bin/run-example   streaming.JavaKinesisWordCountASL myAppName  mySparkStream \
+ *             https://kinesis.us-east-1.amazonaws.com
+ *
+ * There is a companion helper class called KinesisWordProducerASL which puts dummy data
+ * onto the Kinesis stream.
  *
- * There is a companion helper class called KinesisWordCountProducerASL which puts dummy data 
- *   onto the Kinesis stream. 
- * Usage instructions for KinesisWordCountProducerASL are provided in the class definition.
+ * This code uses the DefaultAWSCredentialsProviderChain to find credentials
+ * in the following order:
+ *    Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ *    Java System Properties - aws.accessKeyId and aws.secretKey
+ *    Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ *    Instance profile credentials - delivered through the Amazon EC2 metadata service
+ * For more information, see
+ * http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html
+ *
+ * See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on
+ * the Kinesis Spark Streaming integration.
  */
 public final class JavaKinesisWordCountASL { // needs to be public for access from run-example
-    private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
-    private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
-
-    /* Make the constructor private to enforce singleton */
-    private JavaKinesisWordCountASL() {
+  private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
+  private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class);
+
+  public static void main(String[] args) {
+    // Check that all required args were passed in.
+    if (args.length != 3) {
+      System.err.println(
+          "Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>\n\n" +
+          "    <app-name> is the name of the app, used to track the read data in DynamoDB\n" +
+          "    <stream-name> is the name of the Kinesis stream\n" +
+          "    <endpoint-url> is the endpoint of the Kinesis service\n" +
+          "                   (e.g. https://kinesis.us-east-1.amazonaws.com)\n" +
+          "Generate data for the Kinesis stream using the example KinesisWordProducerASL.\n" +
+          "See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more\n" +
+          "details.\n"
+      );
+      System.exit(1);
     }
 
-    public static void main(String[] args) {
-        /* Check that all required args were passed in. */
-        if (args.length < 2) {
-          System.err.println(
-              "Usage: JavaKinesisWordCountASL <stream-name> <endpoint-url>\n" +
-              "    <stream-name> is the name of the Kinesis stream\n" +
-              "    <endpoint-url> is the endpoint of the Kinesis service\n" +
-              "                   (e.g. https://kinesis.us-east-1.amazonaws.com)\n");
-          System.exit(1);
-        }
-
-        StreamingExamples.setStreamingLogLevels();
-
-        /* Populate the appropriate variables from the given args */
-        String streamName = args[0];
-        String endpointUrl = args[1];
-        /* Set the batch interval to a fixed 2000 millis (2 seconds) */
-        Duration batchInterval = new Duration(2000);
-
-        /* Create a Kinesis client in order to determine the number of shards for the given stream */
-        AmazonKinesisClient kinesisClient = new AmazonKinesisClient(
-                new DefaultAWSCredentialsProviderChain());
-        kinesisClient.setEndpoint(endpointUrl);
-
-        /* Determine the number of shards from the stream */
-        int numShards = kinesisClient.describeStream(streamName)
-                .getStreamDescription().getShards().size();
-
-        /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ 
-        int numStreams = numShards;
-
-        /* Setup the Spark config. */
-        SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount");
-
-        /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
-        Duration checkpointInterval = batchInterval;
+    // Set default log4j logging level to WARN to hide Spark logs
+    StreamingExamples.setStreamingLogLevels();
+
+    // Populate the appropriate variables from the given args
+    String kinesisAppName = args[0];
+    String streamName = args[1];
+    String endpointUrl = args[2];
+
+    // Create a Kinesis client in order to determine the number of shards for the given stream
+    AmazonKinesisClient kinesisClient =
+        new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain());
+    kinesisClient.setEndpoint(endpointUrl);
+    int numShards =
+        kinesisClient.describeStream(streamName).getStreamDescription().getShards().size();
+
+
+    // In this example, we're going to create 1 Kinesis Receiver/input DStream for each shard.
+    // This is not a necessity; if there are less receivers/DStreams than the number of shards,
+    // then the shards will be automatically distributed among the receivers and each receiver
+    // will receive data from multiple shards.
+    int numStreams = numShards;
+
+    // Spark Streaming batch interval
+    Duration batchInterval = new Duration(2000);
+
+    // Kinesis checkpoint interval.  Same as batchInterval for this example.
+    Duration kinesisCheckpointInterval = batchInterval;
+
+    // Get the region name from the endpoint URL to save Kinesis Client Library metadata in
+    // DynamoDB of the same region as the Kinesis stream
+    String regionName = RegionUtils.getRegionByEndpoint(endpointUrl).getName();
+
+    // Setup the Spark config and StreamingContext
+    SparkConf sparkConfig = new SparkConf().setAppName("JavaKinesisWordCountASL");
+    JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
+
+    // Create the Kinesis DStreams
+    List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
+    for (int i = 0; i < numStreams; i++) {
+      streamsList.add(
+          KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
+              InitialPositionInStream.LATEST, kinesisCheckpointInterval, StorageLevel.MEMORY_AND_DISK_2())
+      );
+    }
 
-        /* Setup the StreamingContext */
-        JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);
+    // Union all the streams if there is more than 1 stream
+    JavaDStream<byte[]> unionStreams;
+    if (streamsList.size() > 1) {
+      unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
+    } else {
+      // Otherwise, just use the 1 stream
+      unionStreams = streamsList.get(0);
+    }
 
-        /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
-        List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
-        for (int i = 0; i < numStreams; i++) {
-          streamsList.add(
-            KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval, 
-            InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2())
-          );
+    // Convert each line of Array[Byte] to String, and split into words
+    JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
+      @Override
+      public Iterable<String> call(byte[] line) {
+        return Lists.newArrayList(WORD_SEPARATOR.split(new String(line)));
+      }
+    });
+
+    // Map each word to a (word, 1) tuple so we can reduce by key to count the words
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
+        new PairFunction<String, String, Integer>() {
+          @Override
+          public Tuple2<String, Integer> call(String s) {
+            return new Tuple2<String, Integer>(s, 1);
+          }
         }
-
-        /* Union all the streams if there is more than 1 stream */
-        JavaDStream<byte[]> unionStreams;
-        if (streamsList.size() > 1) {
-            unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
-        } else {
-            /* Otherwise, just use the 1 stream */
-            unionStreams = streamsList.get(0);
+    ).reduceByKey(
+        new Function2<Integer, Integer, Integer>() {
+          @Override
+          public Integer call(Integer i1, Integer i2) {
+            return i1 + i2;
+          }
         }
+    );
 
-        /*
-         * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
-         * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
-         */
-        JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
-                @Override
-                public Iterable<String> call(byte[] line) {
-                    return Lists.newArrayList(WORD_SEPARATOR.split(new String(line)));
-                }
-            });
-
-        /* Map each word to a (word, 1) tuple, then reduce/aggregate by word. */
-        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
-            new PairFunction<String, String, Integer>() {
-                @Override
-                public Tuple2<String, Integer> call(String s) {
-                    return new Tuple2<String, Integer>(s, 1);
-                }
-            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
-                @Override
-                public Integer call(Integer i1, Integer i2) {
-                  return i1 + i2;
-                }
-            });
-
-        /* Print the first 10 wordCounts */
-        wordCounts.print();
-
-        /* Start the streaming context and await termination */
-        jssc.start();
-        jssc.awaitTermination();
-    }
+    // Print the first 10 wordCounts
+    wordCounts.print();
+
+    // Start the streaming context and await termination
+    jssc.start();
+    jssc.awaitTermination();
+  }
 }
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 32da0858d1a1d..640ca049e2ec4 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -18,213 +18,238 @@
 package org.apache.spark.examples.streaming
 
 import java.nio.ByteBuffer
+
 import scala.util.Random
-import org.apache.spark.Logging
-import org.apache.spark.SparkConf
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.Milliseconds
-import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
-import org.apache.spark.streaming.kinesis.KinesisUtils
-import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+
+import com.amazonaws.auth.{DefaultAWSCredentialsProviderChain, BasicAWSCredentials}
+import com.amazonaws.regions.RegionUtils
 import com.amazonaws.services.kinesis.AmazonKinesisClient
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.model.PutRecordRequest
-import org.apache.log4j.Logger
-import org.apache.log4j.Level
+import org.apache.log4j.{Level, Logger}
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
+import org.apache.spark.streaming.kinesis.KinesisUtils
+
 
 /**
- * Kinesis Spark Streaming WordCount example.
+ * Consumes messages from a Amazon Kinesis streams and does wordcount.
  *
- * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details on
- *   the Kinesis Spark Streaming integration.
+ * This example spins up 1 Kinesis Receiver per shard for the given stream.
+ * It then starts pulling from the last checkpointed sequence number of the given stream.
  *
- * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard 
- *   for the given stream.
- * It then starts pulling from the last checkpointed sequence number of the given 
- *   <stream-name> and <endpoint-url>. 
+ * Usage: KinesisWordCountASL <app-name> <stream-name> <endpoint-url> <region-name>
+ *   <app-name> is the name of the consumer app, used to track the read data in DynamoDB
+ *   <stream-name> name of the Kinesis stream (ie. mySparkStream)
+ *   <endpoint-url> endpoint of the Kinesis service
+ *     (e.g. https://kinesis.us-east-1.amazonaws.com)
  *
- * Valid endpoint urls:  http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region
- * 
- * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials
- *   in the following order of precedence:
- * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
- * Java System Properties - aws.accessKeyId and aws.secretKey
- * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
- * Instance profile credentials - delivered through the Amazon EC2 metadata service
- *
- * Usage: KinesisWordCountASL <stream-name> <endpoint-url>
- *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
- *   <endpoint-url> is the endpoint of the Kinesis service
- *     (ie. https://kinesis.us-east-1.amazonaws.com)
  *
  * Example:
- *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
- *    $ export AWS_SECRET_KEY=<your-secret-key>
- *    $ $SPARK_HOME/bin/run-example \
- *        org.apache.spark.examples.streaming.KinesisWordCountASL mySparkStream \
- *        https://kinesis.us-east-1.amazonaws.com
+ *      # export AWS keys if necessary
+ *      $ export AWS_ACCESS_KEY_ID=<your-access-key>
+ *      $ export AWS_SECRET_KEY=<your-secret-key>
+ *
+ *      # run the example
+ *      $ SPARK_HOME/bin/run-example  streaming.KinesisWordCountASL myAppName  mySparkStream \
+ *              https://kinesis.us-east-1.amazonaws.com
  *
- * 
- * Note that number of workers/threads should be 1 more than the number of receivers.
- * This leaves one thread available for actually processing the data.
+ * There is a companion helper class called KinesisWordProducerASL which puts dummy data
+ * onto the Kinesis stream.
  *
- * There is a companion helper class below called KinesisWordCountProducerASL which puts
- *   dummy data onto the Kinesis stream.
- * Usage instructions for KinesisWordCountProducerASL are provided in that class definition.
+ * This code uses the DefaultAWSCredentialsProviderChain to find credentials
+ * in the following order:
+ *    Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY
+ *    Java System Properties - aws.accessKeyId and aws.secretKey
+ *    Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs
+ *    Instance profile credentials - delivered through the Amazon EC2 metadata service
+ * For more information, see
+ * http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html
+ *
+ * See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on
+ * the Kinesis Spark Streaming integration.
  */
-private object KinesisWordCountASL extends Logging {
+object KinesisWordCountASL extends Logging {
   def main(args: Array[String]) {
-    /* Check that all required args were passed in. */
-    if (args.length < 2) {
+    // Check that all required args were passed in.
+    if (args.length != 3) {
       System.err.println(
         """
-          |Usage: KinesisWordCount <stream-name> <endpoint-url>
+          |Usage: KinesisWordCountASL <app-name> <stream-name> <endpoint-url> <region-name>
+          |
+          |    <app-name> is the name of the consumer app, used to track the read data in DynamoDB
           |    <stream-name> is the name of the Kinesis stream
           |    <endpoint-url> is the endpoint of the Kinesis service
           |                   (e.g. https://kinesis.us-east-1.amazonaws.com)
+          |
+          |Generate input data for Kinesis stream using the example KinesisWordProducerASL.
+          |See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more
+          |details.
         """.stripMargin)
       System.exit(1)
     }
 
     StreamingExamples.setStreamingLogLevels()
 
-    /* Populate the appropriate variables from the given args */
-    val Array(streamName, endpointUrl) = args
+    // Populate the appropriate variables from the given args
+    val Array(appName, streamName, endpointUrl) = args
 
-    /* Determine the number of shards from the stream */
-    val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
+
+    // Determine the number of shards from the stream using the low-level Kinesis Client
+    // from the AWS Java SDK.
+    val credentials = new DefaultAWSCredentialsProviderChain().getCredentials()
+    require(credentials != null,
+      "No AWS credentials found. Please specify credentials using one of the methods specified " +
+        "in http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html")
+    val kinesisClient = new AmazonKinesisClient(credentials)
     kinesisClient.setEndpoint(endpointUrl)
-    val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards()
-      .size()
+    val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards().size
+
 
-    /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */
+    // In this example, we're going to create 1 Kinesis Receiver/input DStream for each shard.
+    // This is not a necessity; if there are less receivers/DStreams than the number of shards,
+    // then the shards will be automatically distributed among the receivers and each receiver
+    // will receive data from multiple shards.
     val numStreams = numShards
 
-    /* Setup the and SparkConfig and StreamingContext */
-    /* Spark Streaming batch interval */
+    // Spark Streaming batch interval
     val batchInterval = Milliseconds(2000)
-    val sparkConfig = new SparkConf().setAppName("KinesisWordCount")
-    val ssc = new StreamingContext(sparkConfig, batchInterval)
 
-    /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
+    // Kinesis checkpoint interval is the interval at which the DynamoDB is updated with information
+    //on sequence number of records that have been received. Same as batchInterval for this example.
     val kinesisCheckpointInterval = batchInterval
 
-    /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
+    // Get the region name from the endpoint URL to save Kinesis Client Library metadata in
+    // DynamoDB of the same region as the Kinesis stream
+    val regionName = RegionUtils.getRegionByEndpoint(endpointUrl).getName()
+
+    // Setup the SparkConfig and StreamingContext
+    val sparkConfig = new SparkConf().setAppName("KinesisWordCountASL")
+    val ssc = new StreamingContext(sparkConfig, batchInterval)
+
+    // Create the Kinesis DStreams
     val kinesisStreams = (0 until numStreams).map { i =>
-      KinesisUtils.createStream(ssc, streamName, endpointUrl, kinesisCheckpointInterval,
-          InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2)
+      KinesisUtils.createStream(ssc, appName, streamName, endpointUrl, regionName,
+        InitialPositionInStream.LATEST, kinesisCheckpointInterval, StorageLevel.MEMORY_AND_DISK_2)
     }
 
-    /* Union all the streams */
+    // Union all the streams
     val unionStreams = ssc.union(kinesisStreams)
 
-    /* Convert each line of Array[Byte] to String, split into words, and count them */
-    val words = unionStreams.flatMap(byteArray => new String(byteArray)
-      .split(" "))
+    // Convert each line of Array[Byte] to String, and split into words
+    val words = unionStreams.flatMap(byteArray => new String(byteArray).split(" "))
 
-    /* Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */
+    // Map each word to a (word, 1) tuple so we can reduce by key to count the words
     val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _)
-
-    /* Print the first 10 wordCounts */
+ 
+    // Print the first 10 wordCounts
     wordCounts.print()
 
-    /* Start the streaming context and await termination */
+    // Start the streaming context and await termination
     ssc.start()
     ssc.awaitTermination()
   }
 }
 
 /**
- * Usage: KinesisWordCountProducerASL <stream-name> <kinesis-endpoint-url>
- *     <recordsPerSec> <wordsPerRecord>
+ * Usage: KinesisWordProducerASL <stream-name> <endpoint-url> \
+ *   <records-per-sec> <words-per-record>
+ *
  *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
- *   <kinesis-endpoint-url> is the endpoint of the Kinesis service
+ *   <endpoint-url> is the endpoint of the Kinesis service
  *     (ie. https://kinesis.us-east-1.amazonaws.com)
  *   <records-per-sec> is the rate of records per second to put onto the stream
  *   <words-per-record> is the rate of records per second to put onto the stream
  *
  * Example:
- *    $ export AWS_ACCESS_KEY_ID=<your-access-key>
- *    $ export AWS_SECRET_KEY=<your-secret-key>
- *    $ $SPARK_HOME/bin/run-example \
- *         org.apache.spark.examples.streaming.KinesisWordCountProducerASL mySparkStream \
- *         https://kinesis.us-east-1.amazonaws.com 10 5
+ *    $ SPARK_HOME/bin/run-example streaming.KinesisWordProducerASL mySparkStream \
+ *         https://kinesis.us-east-1.amazonaws.com us-east-1 10 5
  */
-private object KinesisWordCountProducerASL {
+object KinesisWordProducerASL {
   def main(args: Array[String]) {
-    if (args.length < 4) {
-      System.err.println("Usage: KinesisWordCountProducerASL <stream-name> <endpoint-url>" +
-          " <records-per-sec> <words-per-record>")
+    if (args.length != 4) {
+      System.err.println(
+        """
+          |Usage: KinesisWordProducerASL <stream-name> <endpoint-url> <records-per-sec> <words-per-record>
+          |
+          |    <stream-name> is the name of the Kinesis stream
+          |    <endpoint-url> is the endpoint of the Kinesis service
+          |                   (e.g. https://kinesis.us-east-1.amazonaws.com)
+          |    <records-per-sec> is the rate of records per second to put onto the stream
+          |    <words-per-record> is the rate of records per second to put onto the stream
+          |
+        """.stripMargin)
+
       System.exit(1)
     }
 
+    // Set default log4j logging level to WARN to hide Spark logs
     StreamingExamples.setStreamingLogLevels()
 
-    /* Populate the appropriate variables from the given args */
+    // Populate the appropriate variables from the given args
     val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args
 
-    /* Generate the records and return the totals */
-    val totals = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt)
+    // Generate the records and return the totals
+    val totals = generate(stream, endpoint, recordsPerSecond.toInt,
+        wordsPerRecord.toInt)
 
-    /* Print the array of (index, total) tuples */
-    println("Totals")
-    totals.foreach(total => println(total.toString()))
+    // Print the array of (word, total) tuples
+    println("Totals for the words sent")
+    totals.foreach(println(_))
   }
 
   def generate(stream: String,
       endpoint: String,
       recordsPerSecond: Int,
-      wordsPerRecord: Int): Seq[(Int, Int)] = {
-
-    val MaxRandomInts = 10
+      wordsPerRecord: Int): Seq[(String, Int)] = {
 
-    /* Create the Kinesis client */
+    val randomWords = List("spark","you","are","my","father")
+    val totals = scala.collection.mutable.Map[String, Int]()
+  
+    // Create the low-level Kinesis Client from the AWS Java SDK.
     val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain())
     kinesisClient.setEndpoint(endpoint)
 
     println(s"Putting records onto stream $stream and endpoint $endpoint at a rate of" +
-      s" $recordsPerSecond records per second and $wordsPerRecord words per record");
-
-    val totals = new Array[Int](MaxRandomInts)
-    /* Put String records onto the stream per the given recordPerSec and wordsPerRecord */
-    for (i <- 1 to 5) {
-
-      /* Generate recordsPerSec records to put onto the stream */
-      val records = (1 to recordsPerSecond.toInt).map { recordNum =>
-        /* 
-         *  Randomly generate each wordsPerRec words between 0 (inclusive)
-         *  and MAX_RANDOM_INTS (exclusive) 
-         */
+        s" $recordsPerSecond records per second and $wordsPerRecord words per record")
+  
+    // Iterate and put records onto the stream per the given recordPerSec and wordsPerRecord
+    for (i <- 1 to 10) {
+      // Generate recordsPerSec records to put onto the stream
+      val records = (1 to recordsPerSecond.toInt).foreach { recordNum =>
+        // Randomly generate wordsPerRecord number of words
         val data = (1 to wordsPerRecord.toInt).map(x => {
-          /* Generate the random int */
-          val randomInt = Random.nextInt(MaxRandomInts)
+          // Get a random index to a word
+          val randomWordIdx = Random.nextInt(randomWords.size)
+          val randomWord = randomWords(randomWordIdx)
 
-          /* Keep track of the totals */
-          totals(randomInt) += 1
+          // Increment total count to compare to server counts later
+          totals(randomWord) = totals.getOrElse(randomWord, 0) + 1
 
-          randomInt.toString()
+          randomWord
         }).mkString(" ")
 
-        /* Create a partitionKey based on recordNum */
+        // Create a partitionKey based on recordNum
         val partitionKey = s"partitionKey-$recordNum"
 
-        /* Create a PutRecordRequest with an Array[Byte] version of the data */
+        // Create a PutRecordRequest with an Array[Byte] version of the data
         val putRecordRequest = new PutRecordRequest().withStreamName(stream)
             .withPartitionKey(partitionKey)
-            .withData(ByteBuffer.wrap(data.getBytes()));
+            .withData(ByteBuffer.wrap(data.getBytes()))
 
-        /* Put the record onto the stream and capture the PutRecordResult */
-        val putRecordResult = kinesisClient.putRecord(putRecordRequest);
+        // Put the record onto the stream and capture the PutRecordResult
+        val putRecordResult = kinesisClient.putRecord(putRecordRequest)
       }
 
-      /* Sleep for a second */
+      // Sleep for a second
       Thread.sleep(1000)
       println("Sent " + recordsPerSecond + " records")
     }
-
-    /* Convert the totals to (index, total) tuple */
-    (0 to (MaxRandomInts - 1)).zip(totals)
+     // Convert the totals to (index, total) tuple
+    totals.toSeq.sortBy(_._1)
   }
 }
 
@@ -233,8 +258,7 @@ private object KinesisWordCountProducerASL {
  *  This has been lifted from the examples/ project to remove the circular dependency.
  */
 private[streaming] object StreamingExamples extends Logging {
-
-  /** Set reasonable logging levels for streaming if the user has not configured log4j. */
+  // Set reasonable logging levels for streaming if the user has not configured log4j.
   def setStreamingLogLevels() {
     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
     if (!log4jInitialized) {

From d03638cc2d414cee9ac7481084672e454495dfc1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 18 May 2015 21:32:36 -0700
Subject: [PATCH 249/320] [SPARK-7681] [MLLIB] Add SparseVector support for
 gemv

JIRA: https://issues.apache.org/jira/browse/SPARK-7681

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6209 from viirya/sparsevector_gemv and squashes the following commits:

ce0bb8b [Liang-Chi Hsieh] Still need to scal y when beta is 0.0 because it clears out y.
b890e63 [Liang-Chi Hsieh] Do not delete multiply for DenseVector.
57a8c1e [Liang-Chi Hsieh] Add MimaExcludes for v1.4.
458d1ae [Liang-Chi Hsieh] List DenseMatrix.multiply and SparseMatrix.multiply to MimaExcludes too.
054f05d [Liang-Chi Hsieh] Fix scala style.
410381a [Liang-Chi Hsieh] Address comments. Make Matrix.multiply more generalized.
4616696 [Liang-Chi Hsieh] Add support for SparseVector with SparseMatrix.
5d6d07a [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into sparsevector_gemv
c069507 [Liang-Chi Hsieh] Add SparseVector support for gemv with DenseMatrix.
---
 .../org/apache/spark/mllib/linalg/BLAS.scala  | 152 ++++++++++++++++--
 .../apache/spark/mllib/linalg/Matrices.scala  |   7 +-
 .../apache/spark/mllib/linalg/BLASSuite.scala |  96 +++++++++--
 project/MimaExcludes.scala                    |  18 ++-
 4 files changed, 240 insertions(+), 33 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index 87052e1ba8539..ec38529cf8fae 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -463,7 +463,7 @@ private[spark] object BLAS extends Serializable with Logging {
   def gemv(
       alpha: Double,
       A: Matrix,
-      x: DenseVector,
+      x: Vector,
       beta: Double,
       y: DenseVector): Unit = {
     require(A.numCols == x.size,
@@ -473,44 +473,169 @@ private[spark] object BLAS extends Serializable with Logging {
     if (alpha == 0.0) {
       logDebug("gemv: alpha is equal to 0. Returning y.")
     } else {
-      A match {
-        case sparse: SparseMatrix =>
-          gemv(alpha, sparse, x, beta, y)
-        case dense: DenseMatrix =>
-          gemv(alpha, dense, x, beta, y)
+      (A, x) match {
+        case (smA: SparseMatrix, dvx: DenseVector) =>
+          gemv(alpha, smA, dvx, beta, y)
+        case (smA: SparseMatrix, svx: SparseVector) =>
+          gemv(alpha, smA, svx, beta, y)
+        case (dmA: DenseMatrix, dvx: DenseVector) =>
+          gemv(alpha, dmA, dvx, beta, y)
+        case (dmA: DenseMatrix, svx: SparseVector) =>
+          gemv(alpha, dmA, svx, beta, y)
         case _ =>
-          throw new IllegalArgumentException(s"gemv doesn't support matrix type ${A.getClass}.")
+          throw new IllegalArgumentException(s"gemv doesn't support running on matrix type " +
+            s"${A.getClass} and vector type ${x.getClass}.")
       }
     }
   }
 
   /**
    * y := alpha * A * x + beta * y
-   * For `DenseMatrix` A.
+   * For `DenseMatrix` A and `DenseVector` x.
    */
   private def gemv(
       alpha: Double,
       A: DenseMatrix,
       x: DenseVector,
       beta: Double,
-      y: DenseVector): Unit =  {
+      y: DenseVector): Unit = {
     val tStrA = if (A.isTransposed) "T" else "N"
     val mA = if (!A.isTransposed) A.numRows else A.numCols
     val nA = if (!A.isTransposed) A.numCols else A.numRows
     nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta,
       y.values, 1)
   }
+ 
+  /**
+   * y := alpha * A * x + beta * y
+   * For `DenseMatrix` A and `SparseVector` x.
+   */
+  private def gemv(
+      alpha: Double,
+      A: DenseMatrix,
+      x: SparseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    val mA: Int = A.numRows
+    val nA: Int = A.numCols
+
+    val Avals = A.values
+
+    val xIndices = x.indices
+    val xNnz = xIndices.length
+    val xValues = x.values
+    val yValues = y.values
 
+    if (alpha == 0.0) {
+      scal(beta, y)
+      return
+    }
+
+    if (A.isTransposed) {
+      var rowCounterForA = 0
+      while (rowCounterForA < mA) {
+        var sum = 0.0
+        var k = 0
+        while (k < xNnz) {
+          sum += xValues(k) * Avals(xIndices(k) + rowCounterForA * nA)
+          k += 1
+        }
+        yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
+        rowCounterForA += 1
+      }
+    } else {
+      var rowCounterForA = 0
+      while (rowCounterForA < mA) {
+        var sum = 0.0
+        var k = 0
+        while (k < xNnz) {
+          sum += xValues(k) * Avals(xIndices(k) * mA + rowCounterForA)
+          k += 1
+        }
+        yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
+        rowCounterForA += 1
+      }
+    }
+  }
+ 
   /**
    * y := alpha * A * x + beta * y
-   * For `SparseMatrix` A.
+   * For `SparseMatrix` A and `SparseVector` x.
+   */
+  private def gemv(
+      alpha: Double,
+      A: SparseMatrix,
+      x: SparseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    val xValues = x.values
+    val xIndices = x.indices
+    val xNnz = xIndices.length
+
+    val yValues = y.values
+
+    val mA: Int = A.numRows
+    val nA: Int = A.numCols
+
+    val Avals = A.values
+    val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
+    val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
+
+    if (alpha == 0.0) {
+      scal(beta, y)
+      return
+    }
+
+    if (A.isTransposed) {
+      var rowCounter = 0
+      while (rowCounter < mA) {
+        var i = Arows(rowCounter)
+        val indEnd = Arows(rowCounter + 1)
+        var sum = 0.0
+        var k = 0
+        while (k < xNnz && i < indEnd) {
+          if (xIndices(k) == Acols(i)) {
+            sum += Avals(i) * xValues(k)
+            i += 1
+          }
+          k += 1
+        }
+        yValues(rowCounter) = sum * alpha + beta * yValues(rowCounter)
+        rowCounter += 1
+      }
+    } else {
+      scal(beta, y)
+
+      var colCounterForA = 0
+      var k = 0
+      while (colCounterForA < nA && k < xNnz) {
+        if (xIndices(k) == colCounterForA) {
+          var i = Acols(colCounterForA)
+          val indEnd = Acols(colCounterForA + 1)
+
+          val xTemp = xValues(k) * alpha
+          while (i < indEnd) {
+            val rowIndex = Arows(i)
+            yValues(Arows(i)) += Avals(i) * xTemp
+            i += 1
+          }
+          k += 1
+        }
+        colCounterForA += 1
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `SparseMatrix` A and `DenseVector` x.
    */
   private def gemv(
       alpha: Double,
       A: SparseMatrix,
       x: DenseVector,
       beta: Double,
-      y: DenseVector): Unit =  {
+      y: DenseVector): Unit = {
     val xValues = x.values
     val yValues = y.values
     val mA: Int = A.numRows
@@ -534,10 +659,7 @@ private[spark] object BLAS extends Serializable with Logging {
         rowCounter += 1
       }
     } else {
-      // Scale vector first if `beta` is not equal to 0.0
-      if (beta != 0.0) {
-        scal(beta, y)
-      }
+      scal(beta, y)
       // Perform matrix-vector multiplication and add to y
       var colCounterForA = 0
       while (colCounterForA < nA) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index a609674df6b8b..9584da8e3a0f9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -77,8 +77,13 @@ sealed trait Matrix extends Serializable {
     C
   }
 
-  /** Convenience method for `Matrix`-`DenseVector` multiplication. */
+  /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
   def multiply(y: DenseVector): DenseVector = {
+    multiply(y.asInstanceOf[Vector])
+  }
+
+  /** Convenience method for `Matrix`-`Vector` multiplication. */
+  def multiply(y: Vector): DenseVector = {
     val output = new DenseVector(new Array[Double](numRows))
     BLAS.gemv(1.0, this, y, 0.0, output)
     output
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
index 002cb253862b5..64ecd12ea7ded 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
@@ -256,42 +256,108 @@ class BLASSuite extends FunSuite {
     val dA =
       new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
     val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
-
-    val x = new DenseVector(Array(1.0, 2.0, 3.0))
+ 
+    val dA2 =
+      new DenseMatrix(4, 3, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0), true)
+    val sA2 =
+      new SparseMatrix(4, 3, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0),
+        true)
+ 
+    val dx = new DenseVector(Array(1.0, 2.0, 3.0))
+    val sx = dx.toSparse
     val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
 
-    assert(dA.multiply(x) ~== expected absTol 1e-15)
-    assert(sA.multiply(x) ~== expected absTol 1e-15)
-
+    assert(dA.multiply(dx) ~== expected absTol 1e-15)
+    assert(sA.multiply(dx) ~== expected absTol 1e-15)
+    assert(dA.multiply(sx) ~== expected absTol 1e-15)
+    assert(sA.multiply(sx) ~== expected absTol 1e-15)
+ 
     val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
     val y2 = y1.copy
     val y3 = y1.copy
     val y4 = y1.copy
+    val y5 = y1.copy
+    val y6 = y1.copy
+    val y7 = y1.copy
+    val y8 = y1.copy
+    val y9 = y1.copy
+    val y10 = y1.copy
+    val y11 = y1.copy
+    val y12 = y1.copy
+    val y13 = y1.copy
+    val y14 = y1.copy
+    val y15 = y1.copy
+    val y16 = y1.copy
+ 
     val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
     val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))
 
-    gemv(1.0, dA, x, 2.0, y1)
-    gemv(1.0, sA, x, 2.0, y2)
-    gemv(2.0, dA, x, 2.0, y3)
-    gemv(2.0, sA, x, 2.0, y4)
+    gemv(1.0, dA, dx, 2.0, y1)
+    gemv(1.0, sA, dx, 2.0, y2)
+    gemv(1.0, dA, sx, 2.0, y3)
+    gemv(1.0, sA, sx, 2.0, y4)
+ 
+    gemv(1.0, dA2, dx, 2.0, y5)
+    gemv(1.0, sA2, dx, 2.0, y6)
+    gemv(1.0, dA2, sx, 2.0, y7)
+    gemv(1.0, sA2, sx, 2.0, y8)
+ 
+    gemv(2.0, dA, dx, 2.0, y9)
+    gemv(2.0, sA, dx, 2.0, y10)
+    gemv(2.0, dA, sx, 2.0, y11)
+    gemv(2.0, sA, sx, 2.0, y12)
+ 
+    gemv(2.0, dA2, dx, 2.0, y13)
+    gemv(2.0, sA2, dx, 2.0, y14)
+    gemv(2.0, dA2, sx, 2.0, y15)
+    gemv(2.0, sA2, sx, 2.0, y16)
+ 
     assert(y1 ~== expected2 absTol 1e-15)
     assert(y2 ~== expected2 absTol 1e-15)
-    assert(y3 ~== expected3 absTol 1e-15)
-    assert(y4 ~== expected3 absTol 1e-15)
+    assert(y3 ~== expected2 absTol 1e-15)
+    assert(y4 ~== expected2 absTol 1e-15)
+ 
+    assert(y5 ~== expected2 absTol 1e-15)
+    assert(y6 ~== expected2 absTol 1e-15)
+    assert(y7 ~== expected2 absTol 1e-15)
+    assert(y8 ~== expected2 absTol 1e-15)
+ 
+    assert(y9 ~== expected3 absTol 1e-15)
+    assert(y10 ~== expected3 absTol 1e-15)
+    assert(y11 ~== expected3 absTol 1e-15)
+    assert(y12 ~== expected3 absTol 1e-15)
+ 
+    assert(y13 ~== expected3 absTol 1e-15)
+    assert(y14 ~== expected3 absTol 1e-15)
+    assert(y15 ~== expected3 absTol 1e-15)
+    assert(y16 ~== expected3 absTol 1e-15)
+ 
     withClue("columns of A don't match the rows of B") {
       intercept[Exception] {
-        gemv(1.0, dA.transpose, x, 2.0, y1)
+        gemv(1.0, dA.transpose, dx, 2.0, y1)
+      }
+      intercept[Exception] {
+        gemv(1.0, sA.transpose, dx, 2.0, y1)
+      }
+      intercept[Exception] {
+        gemv(1.0, dA.transpose, sx, 2.0, y1)
+      }
+      intercept[Exception] {
+        gemv(1.0, sA.transpose, sx, 2.0, y1)
       }
     }
+ 
     val dAT =
       new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
     val sAT =
       new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
-
+ 
     val dATT = dAT.transpose
     val sATT = sAT.transpose
 
-    assert(dATT.multiply(x) ~== expected absTol 1e-15)
-    assert(sATT.multiply(x) ~== expected absTol 1e-15)
+    assert(dATT.multiply(dx) ~== expected absTol 1e-15)
+    assert(sATT.multiply(dx) ~== expected absTol 1e-15)
+    assert(dATT.multiply(sx) ~== expected absTol 1e-15)
+    assert(sATT.multiply(sx) ~== expected absTol 1e-15)
   }
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 513bbaf98d804..f8d0160f6445e 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -87,7 +87,14 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.mllib.linalg.Vector.toSparse"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.linalg.Vector.numActives")
+              "org.apache.spark.mllib.linalg.Vector.numActives"),
+            // SPARK-7681 add SparseVector support for gemv
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.multiply"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.DenseMatrix.multiply"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.SparseMatrix.multiply")
           ) ++ Seq(
             // Execution should never be included as its always internal.
             MimaBuild.excludeSparkPackage("sql.execution"),
@@ -180,7 +187,14 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.mllib.linalg.Matrix.isTransposed"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.linalg.Matrix.foreachActive")
+              "org.apache.spark.mllib.linalg.Matrix.foreachActive"),
+            // SPARK-7681 add SparseVector support for gemv
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.Matrix.multiply"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.DenseMatrix.multiply"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.mllib.linalg.SparseMatrix.multiply")
           ) ++ Seq(
             // SPARK-5540
             ProblemFilters.exclude[MissingMethodProblem](

From c2437de1899e09894df4ec27adfaa7fac158fd3a Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <daoyuan.wang@intel.com>
Date: Mon, 18 May 2015 21:43:12 -0700
Subject: [PATCH 250/320] [SPARK-7150] SparkContext.range() and
 SQLContext.range()

This PR is based on #6081, thanks adrian-wang.

Closes #6081

Author: Daoyuan Wang <daoyuan.wang@intel.com>
Author: Davies Liu <davies@databricks.com>

Closes #6230 from davies/range and squashes the following commits:

d3ce5fe [Davies Liu] add tests
789eda5 [Davies Liu] add range() in Python
4590208 [Davies Liu] Merge commit 'refs/pull/6081/head' of github.com:apache/spark into range
cbf5200 [Daoyuan Wang] let's add python support in a separate PR
f45e3b2 [Daoyuan Wang] remove redundant toLong
617da76 [Daoyuan Wang] fix safe marge for corner cases
867c417 [Daoyuan Wang] fix
13dbe84 [Daoyuan Wang] update
bd998ba [Daoyuan Wang] update comments
d3a0c1b [Daoyuan Wang] add range api()
---
 .../scala/org/apache/spark/SparkContext.scala | 72 +++++++++++++++++++
 python/pyspark/context.py                     | 16 +++++
 python/pyspark/sql/context.py                 | 20 ++++++
 python/pyspark/sql/tests.py                   |  5 ++
 python/pyspark/tests.py                       |  5 ++
 .../org/apache/spark/sql/SQLContext.scala     | 31 ++++++++
 .../org/apache/spark/sql/DataFrameSuite.scala | 40 +++++++++++
 7 files changed, 189 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index f78fbaf33f656..3fe3dc5e300e8 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -697,6 +697,78 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
     new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]())
   }
 
+  /**
+   * Creates a new RDD[Long] containing elements from `start` to `end`(exclusive), increased by
+   * `step` every element.
+   *
+   * @note if we need to cache this RDD, we should make sure each partition does not exceed limit.
+   *
+   * @param start the start value.
+   * @param end the end value.
+   * @param step the incremental step
+   * @param numSlices the partition number of the new RDD.
+   * @return
+   */
+  def range(
+      start: Long,
+      end: Long,
+      step: Long = 1,
+      numSlices: Int = defaultParallelism): RDD[Long] = withScope {
+    assertNotStopped()
+    // when step is 0, range will run infinitely
+    require(step != 0, "step cannot be 0")
+    val numElements: BigInt = {
+      val safeStart = BigInt(start)
+      val safeEnd = BigInt(end)
+      if ((safeEnd - safeStart) % step == 0 || safeEnd > safeStart ^ step > 0) {
+        (safeEnd - safeStart) / step
+      } else {
+        // the remainder has the same sign with range, could add 1 more
+        (safeEnd - safeStart) / step + 1
+      }
+    }
+    parallelize(0 until numSlices, numSlices).mapPartitionsWithIndex((i, _) => {
+      val partitionStart = (i * numElements) / numSlices * step + start
+      val partitionEnd = (((i + 1) * numElements) / numSlices) * step + start
+      def getSafeMargin(bi: BigInt): Long =
+        if (bi.isValidLong) {
+          bi.toLong
+        } else if (bi > 0) {
+          Long.MaxValue
+        } else {
+          Long.MinValue
+        }
+      val safePartitionStart = getSafeMargin(partitionStart)
+      val safePartitionEnd = getSafeMargin(partitionEnd)
+
+      new Iterator[Long] {
+        private[this] var number: Long = safePartitionStart
+        private[this] var overflow: Boolean = false
+
+        override def hasNext =
+          if (!overflow) {
+            if (step > 0) {
+              number < safePartitionEnd
+            } else {
+              number > safePartitionEnd
+            }
+          } else false
+
+        override def next() = {
+          val ret = number
+          number += step
+          if (number < ret ^ step < 0) {
+            // we have Long.MaxValue + Long.MaxValue < Long.MaxValue
+            // and Long.MinValue + Long.MinValue > Long.MinValue, so iff the step causes a step
+            // back, we are pretty sure that we have an overflow.
+            overflow = true
+          }
+          ret
+        }
+      }
+    })
+  }
+
   /** Distribute a local Scala collection to form an RDD.
    *
    * This method is identical to `parallelize`.
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index d25ee855235be..1f2b40b29fafa 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -319,6 +319,22 @@ def stop(self):
         with SparkContext._lock:
             SparkContext._active_spark_context = None
 
+    def range(self, start, end, step=1, numSlices=None):
+        """
+        Create a new RDD of int containing elements from `start` to `end`
+        (exclusive), increased by `step` every element.
+
+        :param start: the start value
+        :param end: the end value (exclusive)
+        :param step: the incremental step (default: 1)
+        :param numSlices: the number of partitions of the new RDD
+        :return: An RDD of int
+
+        >>> sc.range(1, 7, 2).collect()
+        [1, 3, 5]
+        """
+        return self.parallelize(xrange(start, end, step), numSlices)
+
     def parallelize(self, c, numSlices=None):
         """
         Distribute a local Python collection to form an RDD. Using xrange
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 0bde7191242ab..9f26d13235d5f 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -122,6 +122,26 @@ def udf(self):
         """Returns a :class:`UDFRegistration` for UDF registration."""
         return UDFRegistration(self)
 
+    def range(self, start, end, step=1, numPartitions=None):
+        """
+        Create a :class:`DataFrame` with single LongType column named `id`,
+        containing elements in a range from `start` to `end` (exclusive) with
+        step value `step`.
+
+        :param start: the start value
+        :param end: the end value (exclusive)
+        :param step: the incremental step (default: 1)
+        :param numPartitions: the number of partitions of the DataFrame
+        :return: A new DataFrame
+
+        >>> sqlContext.range(1, 7, 2).collect()
+        [Row(id=1), Row(id=3), Row(id=5)]
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        jdf = self._ssql_ctx.range(int(start), int(end), int(step), int(numPartitions))
+        return DataFrame(jdf, self)
+
     @ignore_unicode_prefix
     def registerFunction(self, name, f, returnType=StringType()):
         """Registers a lambda function as a UDF so it can be used in SQL statements.
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index d37c5dbed7f6b..84ae36f2fd026 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -117,6 +117,11 @@ def tearDownClass(cls):
         ReusedPySparkTestCase.tearDownClass()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
 
+    def test_range(self):
+        self.assertEqual(self.sqlCtx.range(1, 1).count(), 0)
+        self.assertEqual(self.sqlCtx.range(1, 0, -1).count(), 1)
+        self.assertEqual(self.sqlCtx.range(0, 1 << 40, 1 << 39).count(), 2)
+
     def test_explode(self):
         from pyspark.sql.functions import explode
         d = [Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"})]
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 5e023f6c53517..d8e319994cc96 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -444,6 +444,11 @@ def func(x):
 
 class RDDTests(ReusedPySparkTestCase):
 
+    def test_range(self):
+        self.assertEqual(self.sc.range(1, 1).count(), 0)
+        self.assertEqual(self.sc.range(1, 0, -1).count(), 1)
+        self.assertEqual(self.sc.range(0, 1 << 40, 1 << 39).count(), 2)
+
     def test_id(self):
         rdd = self.sc.parallelize(range(10))
         id = rdd.id()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index ac1a800219423..316ef7d58809d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -684,6 +684,37 @@ class SQLContext(@transient val sparkContext: SparkContext)
     catalog.unregisterTable(Seq(tableName))
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * in an range from `start` to `end`(exclusive) with step value 1.
+   *
+   * @since 1.4.0
+   * @group dataframe
+   */
+  @Experimental
+  def range(start: Long, end: Long): DataFrame = {
+    createDataFrame(
+      sparkContext.range(start, end).map(Row(_)),
+      StructType(StructField("id", LongType, nullable = false) :: Nil))
+  }
+
+  /**
+   * :: Experimental ::
+   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * in an range from `start` to `end`(exclusive) with an step value, with partition number
+   * specified.
+   *
+   * @since 1.4.0
+   * @group dataframe
+   */
+  @Experimental
+  def range(start: Long, end: Long, step: Long, numPartitions: Int): DataFrame = {
+    createDataFrame(
+      sparkContext.range(start, end, step, numPartitions).map(Row(_)),
+      StructType(StructField("id", LongType, nullable = false) :: Nil))
+  }
+
   /**
    * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. The dialect that is
    * used for SQL parsing can be configured with 'spark.sql.dialect'.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 054b23dba84c5..f05d059d443c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -532,4 +532,44 @@ class DataFrameSuite extends QueryTest {
     val p = df.logicalPlan.asInstanceOf[Project].child.asInstanceOf[Project]
     assert(!p.child.isInstanceOf[Project])
   }
+
+  test("SPARK-7150 range api") {
+    // numSlice is greater than length
+    val res1 = TestSQLContext.range(0, 10, 1, 15).select("id")
+    assert(res1.count == 10)
+    assert(res1.agg(sum("id")).as("sumid").collect() === Seq(Row(45)))
+
+    val res2 = TestSQLContext.range(3, 15, 3, 2).select("id")
+    assert(res2.count == 4)
+    assert(res2.agg(sum("id")).as("sumid").collect() === Seq(Row(30)))
+
+    val res3 = TestSQLContext.range(1, -2).select("id")
+    assert(res3.count == 0)
+
+    // start is positive, end is negative, step is negative
+    val res4 = TestSQLContext.range(1, -2, -2, 6).select("id")
+    assert(res4.count == 2)
+    assert(res4.agg(sum("id")).as("sumid").collect() === Seq(Row(0)))
+
+    // start, end, step are negative
+    val res5 = TestSQLContext.range(-3, -8, -2, 1).select("id")
+    assert(res5.count == 3)
+    assert(res5.agg(sum("id")).as("sumid").collect() === Seq(Row(-15)))
+
+    // start, end are negative, step is positive
+    val res6 = TestSQLContext.range(-8, -4, 2, 1).select("id")
+    assert(res6.count == 2)
+    assert(res6.agg(sum("id")).as("sumid").collect() === Seq(Row(-14)))
+
+    val res7 = TestSQLContext.range(-10, -9, -20, 1).select("id")
+    assert(res7.count == 0)
+
+    val res8 = TestSQLContext.range(Long.MinValue, Long.MaxValue, Long.MaxValue, 100).select("id")
+    assert(res8.count == 3)
+    assert(res8.agg(sum("id")).as("sumid").collect() === Seq(Row(-3)))
+
+    val res9 = TestSQLContext.range(Long.MaxValue, Long.MinValue, Long.MinValue, 100).select("id")
+    assert(res9.count == 2)
+    assert(res9.agg(sum("id")).as("sumid").collect() === Seq(Row(Long.MaxValue - 1)))
+  }
 }

From c9fa870a6de3f7d0903fa7a75ea5ffb6a2fcd174 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Mon, 18 May 2015 21:53:44 -0700
Subject: [PATCH 251/320] [SPARK-7687] [SQL] DataFrame.describe() should cast
 all aggregates to String

In `DataFrame.describe()`, the `count` aggregate produces an integer, the `avg` and `stdev` aggregates produce doubles, and `min` and `max` aggregates can produce varying types depending on what type of column they're applied to.  As a result, we should cast all aggregate results to String so that `describe()`'s output types match its declared output schema.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6218 from JoshRosen/SPARK-7687 and squashes the following commits:

146b615 [Josh Rosen] Fix R test.
2974bd5 [Josh Rosen] Cast to string type instead
f206580 [Josh Rosen] Cast to double to fix SPARK-7687
307ecbf [Josh Rosen] Add failing regression test for SPARK-7687
---
 R/pkg/inst/tests/test_sparkSQL.R                | 10 +++++-----
 .../scala/org/apache/spark/sql/DataFrame.scala  |  6 +++---
 .../org/apache/spark/sql/DataFrameSuite.scala   | 17 +++++++++++------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 3e5658eb5b24b..1768c57fd02e4 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -757,12 +757,12 @@ test_that("parquetFile works with multiple input paths", {
 test_that("describe() on a DataFrame", {
   df <- jsonFile(sqlCtx, jsonPath)
   stats <- describe(df, "age")
-  expect_true(collect(stats)[1, "summary"] == "count")
-  expect_true(collect(stats)[2, "age"] == 24.5)
-  expect_true(collect(stats)[3, "age"] == 5.5)
+  expect_equal(collect(stats)[1, "summary"], "count")
+  expect_equal(collect(stats)[2, "age"], "24.5")
+  expect_equal(collect(stats)[3, "age"], "5.5")
   stats <- describe(df)
-  expect_true(collect(stats)[4, "name"] == "Andy")
-  expect_true(collect(stats)[5, "age"] == 30.0)
+  expect_equal(collect(stats)[4, "name"], "Andy")
+  expect_equal(collect(stats)[5, "age"], "30")
 })
 
 unlink(parquetPath)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 27e9af49f0664..adad85806d1ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1063,7 +1063,7 @@ class DataFrame private[sql](
 
     val ret: Seq[Row] = if (outputCols.nonEmpty) {
       val aggExprs = statistics.flatMap { case (_, colToAgg) =>
-        outputCols.map(c => Column(colToAgg(Column(c).expr)).as(c))
+        outputCols.map(c => Column(Cast(colToAgg(Column(c).expr), StringType)).as(c))
       }
 
       val row = agg(aggExprs.head, aggExprs.tail: _*).head().toSeq
@@ -1077,9 +1077,9 @@ class DataFrame private[sql](
       statistics.map { case (name, _) => Row(name) }
     }
 
-    // The first column is string type, and the rest are double type.
+    // All columns are string type
     val schema = StructType(
-      StructField("summary", StringType) :: outputCols.map(StructField(_, DoubleType))).toAttributes
+      StructField("summary", StringType) :: outputCols.map(StructField(_, StringType))).toAttributes
     LocalRelation(schema, ret)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index f05d059d443c4..0dcba80ef2a20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -370,14 +370,14 @@ class DataFrameSuite extends QueryTest {
       ("Amy",   24, 180)).toDF("name", "age", "height")
 
     val describeResult = Seq(
-      Row("count",   4,               4),
-      Row("mean",    33.0,            178.0),
-      Row("stddev",  16.583123951777, 10.0),
-      Row("min",     16,              164),
-      Row("max",     60,              192))
+      Row("count",   "4",               "4"),
+      Row("mean",    "33.0",            "178.0"),
+      Row("stddev",  "16.583123951777", "10.0"),
+      Row("min",     "16",              "164"),
+      Row("max",     "60",              "192"))
 
     val emptyDescribeResult = Seq(
-      Row("count",   0,    0),
+      Row("count",   "0",  "0"),
       Row("mean",    null, null),
       Row("stddev",  null, null),
       Row("min",     null, null),
@@ -388,6 +388,11 @@ class DataFrameSuite extends QueryTest {
     val describeTwoCols = describeTestData.describe("age", "height")
     assert(getSchemaAsSeq(describeTwoCols) === Seq("summary", "age", "height"))
     checkAnswer(describeTwoCols, describeResult)
+    // All aggregate value should have been cast to string
+    describeTwoCols.collect().foreach { row =>
+      assert(row.get(1).isInstanceOf[String], "expected string but found " + row.get(1).getClass)
+      assert(row.get(2).isInstanceOf[String], "expected string but found " + row.get(2).getClass)
+    }
 
     val describeAllCols = describeTestData.describe()
     assert(getSchemaAsSeq(describeAllCols) === Seq("summary", "age", "height"))

From 9ebb44f8abb1a13f045eed60190954db904ffef7 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Tue, 19 May 2015 06:00:13 +0000
Subject: [PATCH 252/320] [HOTFIX]: Java 6 Build Breaks

These were blocking RC1 so I fixed them manually.
---
 .../shuffle/unsafe/UnsafeShuffleWriterSuite.java   | 14 --------------
 .../apache/spark/sql/hive/orc/OrcRelation.scala    |  3 ++-
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
index 730d265c87f88..03116d8fc2b21 100644
--- a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
@@ -252,20 +252,6 @@ public void doNotNeedToCallWriteBeforeUnsuccessfulStop() throws IOException {
     createWriter(false).stop(false);
   }
 
-  @Test
-  public void writeEmptyIterator() throws Exception {
-    final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
-    writer.write(Collections.<Product2<Object, Object>>emptyIterator());
-    final Option<MapStatus> mapStatus = writer.stop(true);
-    assertTrue(mapStatus.isDefined());
-    assertTrue(mergedOutputFile.exists());
-    assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile);
-    assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleRecordsWritten());
-    assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleBytesWritten());
-    assertEquals(0, taskMetrics.diskBytesSpilled());
-    assertEquals(0, taskMetrics.memoryBytesSpilled());
-  }
-
   @Test
   public void writeWithoutSpilling() throws Exception {
     // In this example, each partition should have exactly one record:
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
index 58b97adb46165..b69e14a179d0a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -17,8 +17,9 @@
 
 package org.apache.spark.sql.hive.orc
 
-import java.util.{Objects, Properties}
+import java.util.Properties
 
+import com.google.common.base.Objects
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars

From 23cf897112624ece19a3b5e5394cdf71b9c3c8b3 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Tue, 19 May 2015 00:02:06 -0700
Subject: [PATCH 253/320] [HOTFIX] Fixing style failures in Kinesis source

---
 .../spark/examples/streaming/KinesisWordCountASL.scala      | 6 ++++--
 .../apache/spark/streaming/kinesis/KinesisReceiver.scala    | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 640ca049e2ec4..df77f4be9db1d 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -119,7 +119,8 @@ object KinesisWordCountASL extends Logging {
     val batchInterval = Milliseconds(2000)
 
     // Kinesis checkpoint interval is the interval at which the DynamoDB is updated with information
-    //on sequence number of records that have been received. Same as batchInterval for this example.
+    // on sequence number of records that have been received. Same as batchInterval for this 
+    // example.
     val kinesisCheckpointInterval = batchInterval
 
     // Get the region name from the endpoint URL to save Kinesis Client Library metadata in
@@ -173,7 +174,8 @@ object KinesisWordProducerASL {
     if (args.length != 4) {
       System.err.println(
         """
-          |Usage: KinesisWordProducerASL <stream-name> <endpoint-url> <records-per-sec> <words-per-record>
+          |Usage: KinesisWordProducerASL <stream-name> <endpoint-url> <records-per-sec>
+                                         <words-per-record>
           |
           |    <stream-name> is the name of the Kinesis stream
           |    <endpoint-url> is the endpoint of the Kinesis service
diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 01608fbd3fd31..90164490efb2e 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -82,8 +82,8 @@ private[kinesis] class KinesisReceiver(
    */
 
   /**
-   * workerId is used by the KCL should be based on the ip address of the actual Spark Worker where this code runs
-   * (not the driver's IP address.)
+   * workerId is used by the KCL should be based on the ip address of the actual Spark Worker 
+   * where this code runs (not the driver's IP address.)
    */
   private var workerId: String = null
 

From 6008ec14ed6491d0a854bb50548c46f2f9709269 Mon Sep 17 00:00:00 2001
From: Xusen Yin <yinxusen@gmail.com>
Date: Tue, 19 May 2015 00:06:33 -0700
Subject: [PATCH 254/320] [SPARK-7581] [ML] [DOC] User guide for spark.ml
 PolynomialExpansion

JIRA [here](https://issues.apache.org/jira/browse/SPARK-7581).

CC jkbradley

Author: Xusen Yin <yinxusen@gmail.com>

Closes #6113 from yinxusen/SPARK-7581 and squashes the following commits:

1a7d80d [Xusen Yin] merge with master
892a8e9 [Xusen Yin] fix python 3 compatibility
ec935bf [Xusen Yin] small fix
3e9fa1d [Xusen Yin] delete note
69fcf85 [Xusen Yin] simplify and add python example
81d21dc [Xusen Yin] add programming guide for Polynomial Expansion
40babfb [Xusen Yin] add java test suite for PolynomialExpansion
---
 docs/ml-features.md                           | 83 +++++++++++++++++
 .../feature/JavaPolynomialExpansionSuite.java | 91 +++++++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 5df61dd36a070..e86f9edc4f68b 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -268,5 +268,88 @@ for binarized_feature, in binarizedFeatures.collect():
 </div>
 </div>
 
+## PolynomialExpansion
+
+[Polynomial expansion](http://en.wikipedia.org/wiki/Polynomial_expansion) is the process of expanding your features into a polynomial space, which is formulated by an n-degree combination of original dimensions. A [PolynomialExpansion](api/scala/index.html#org.apache.spark.ml.feature.PolynomialExpansion) class provides this functionality.  The example below shows how to expand your features into a 3-degree polynomial space.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.PolynomialExpansion
+import org.apache.spark.mllib.linalg.Vectors
+
+val data = Array(
+  Vectors.dense(-2.0, 2.3),
+  Vectors.dense(0.0, 0.0),
+  Vectors.dense(0.6, -1.1)
+)
+val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+val polynomialExpansion = new PolynomialExpansion()
+  .setInputCol("features")
+  .setOutputCol("polyFeatures")
+  .setDegree(3)
+val polyDF = polynomialExpansion.transform(df)
+polyDF.select("polyFeatures").take(3).foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaSparkContext jsc = ...
+SQLContext jsql = ...
+PolynomialExpansion polyExpansion = new PolynomialExpansion()
+  .setInputCol("features")
+  .setOutputCol("polyFeatures")
+  .setDegree(3);
+JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(Vectors.dense(-2.0, 2.3)),
+  RowFactory.create(Vectors.dense(0.0, 0.0)),
+  RowFactory.create(Vectors.dense(0.6, -1.1))
+));
+StructType schema = new StructType(new StructField[] {
+  new StructField("features", new VectorUDT(), false, Metadata.empty()),
+});
+DataFrame df = jsql.createDataFrame(data, schema);
+DataFrame polyDF = polyExpansion.transform(df);
+Row[] row = polyDF.select("polyFeatures").take(3);
+for (Row r : row) {
+  System.out.println(r.get(0));
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import PolynomialExpansion
+from pyspark.mllib.linalg import Vectors
+
+df = sqlContext.createDataFrame(
+  [(Vectors.dense([-2.0, 2.3]), ),
+  (Vectors.dense([0.0, 0.0]), ),
+  (Vectors.dense([0.6, -1.1]), )],
+  ["features"])
+px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures")
+polyDF = px.transform(df)
+for expanded in polyDF.select("polyFeatures").take(3):
+  print(expanded)
+{% endhighlight %}
+</div>
+</div>
+
 # Feature Selectors
 
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
new file mode 100644
index 0000000000000..5e8211c2c5118
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaPolynomialExpansionSuite {
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaPolynomialExpansionSuite");
+    jsql = new SQLContext(jsc);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void polynomialExpansionTest() {
+    PolynomialExpansion polyExpansion = new PolynomialExpansion()
+      .setInputCol("features")
+      .setOutputCol("polyFeatures")
+      .setDegree(3);
+
+    JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+      RowFactory.create(
+        Vectors.dense(-2.0, 2.3),
+        Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)
+      ),
+      RowFactory.create(Vectors.dense(0.0, 0.0), Vectors.dense(new double[9])),
+      RowFactory.create(
+        Vectors.dense(0.6, -1.1),
+        Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331)
+      )
+    ));
+
+    StructType schema = new StructType(new StructField[] {
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+      new StructField("expected", new VectorUDT(), false, Metadata.empty())
+    });
+
+    DataFrame dataset = jsql.createDataFrame(data, schema);
+
+    Row[] pairs = polyExpansion.transform(dataset)
+      .select("polyFeatures", "expected")
+      .collect();
+
+    for (Row r : pairs) {
+      double[] polyFeatures = ((Vector)r.get(0)).toArray();
+      double[] expected = ((Vector)r.get(1)).toArray();
+      Assert.assertArrayEquals(polyFeatures, expected, 1e-1);
+    }
+  }
+}

From 61f164d3fdd1c8dcdba8c9d66df05ff4069aa6e6 Mon Sep 17 00:00:00 2001
From: Mike Dusenberry <dusenberrymw@gmail.com>
Date: Tue, 19 May 2015 08:59:45 +0100
Subject: [PATCH 255/320] Fixing a few basic typos in the Programming Guide.

Just a few minor fixes in the guide, so a new JIRA issue was not created per the guidelines.

Author: Mike Dusenberry <dusenberrymw@gmail.com>

Closes #6240 from dusenberrymw/Fix_Programming_Guide_Typos and squashes the following commits:

ffa76eb [Mike Dusenberry] Fixing a few basic typos in the Programming Guide.
---
 docs/programming-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 27816515c5de2..0c273769bb14b 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -1071,7 +1071,7 @@ for details.
 </tr>
 <tr>
   <td> <b>saveAsSequenceFile</b>(<i>path</i>) <br /> (Java and Scala) </td>
-  <td> Write the elements of the dataset as a Hadoop SequenceFile in a given path in the local filesystem, HDFS or any other Hadoop-supported file system. This is available on RDDs of key-value pairs that either implement Hadoop's Writable interface. In Scala, it is also
+  <td> Write the elements of the dataset as a Hadoop SequenceFile in a given path in the local filesystem, HDFS or any other Hadoop-supported file system. This is available on RDDs of key-value pairs that implement Hadoop's Writable interface. In Scala, it is also
    available on types that are implicitly convertible to Writable (Spark includes conversions for basic types like Int, Double, String, etc). </td>
 </tr>
 <tr>
@@ -1122,7 +1122,7 @@ ordered data following shuffle then it's possible to use:
 * `sortBy` to make a globally ordered RDD
 
 Operations which can cause a shuffle include **repartition** operations like
-[`repartition`](#RepartitionLink), and [`coalesce`](#CoalesceLink), **'ByKey** operations
+[`repartition`](#RepartitionLink) and [`coalesce`](#CoalesceLink), **'ByKey** operations
 (except for counting) like [`groupByKey`](#GroupByLink) and [`reduceByKey`](#ReduceByLink), and
 **join** operations like [`cogroup`](#CogroupLink) and [`join`](#JoinLink).
 
@@ -1138,7 +1138,7 @@ read the relevant sorted blocks.
         
 Certain shuffle operations can consume significant amounts of heap memory since they employ 
 in-memory data structures to organize records before or after transferring them. Specifically, 
-`reduceByKey` and `aggregateByKey` create these structures on the map side and `'ByKey` operations 
+`reduceByKey` and `aggregateByKey` create these structures on the map side, and `'ByKey` operations 
 generate these on the reduce side. When data does not fit in memory Spark will spill these tables 
 to disk, incurring the additional overhead of disk I/O and increased garbage collection.
 

From 27fa88b9ba320cd0d95703aa3437151ba7c86f98 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Tue, 19 May 2015 02:28:41 -0700
Subject: [PATCH 256/320] [HOTFIX] Revert "[SPARK-7092] Update spark scala
 version to 2.11.6"

This reverts commit a11c8683c76c67f45749a1b50a0912a731fd2487.

For more information see:
https://issues.apache.org/jira/browse/SPARK-7726
---
 pom.xml                                                       | 4 ++--
 .../src/main/scala/org/apache/spark/repl/SparkIMain.scala     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index c72d7cbf843ef..d903f02c1aed0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1799,9 +1799,9 @@
         <property><name>scala-2.11</name></property>
       </activation>
       <properties>
-        <scala.version>2.11.6</scala.version>
+        <scala.version>2.11.2</scala.version>
         <scala.binary.version>2.11</scala.binary.version>
-        <jline.version>2.12.1</jline.version>
+        <jline.version>2.12</jline.version>
         <jline.groupid>jline</jline.groupid>
       </properties>
     </profile>
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
index 1cb910f376060..1bb62c84abddc 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -1129,7 +1129,7 @@ class SparkIMain(@BeanProperty val factory: ScriptEngineFactory, initialSettings
 
     def apply(line: String): Result = debugging(s"""parse("$line")""")  {
       var isIncomplete = false
-      currentRun.parsing.withIncompleteHandler((_, _) => isIncomplete = true) {
+      currentRun.reporting.withIncompleteHandler((_, _) => isIncomplete = true) {
         reporter.reset()
         val trees = newUnitParser(line).parseStats()
         if (reporter.hasErrors) Error

From df34793ad4e76214fc4c0a22af1eb89b171a32e4 Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Tue, 19 May 2015 10:31:11 +0100
Subject: [PATCH 257/320] [SPARK-7723] Fix string interpolation in pipeline
 examples

https://issues.apache.org/jira/browse/SPARK-7723

Author: Saleem Ansari <tuxdna@gmail.com>

Closes #6258 from tuxdna/master and squashes the following commits:

2bb5a42 [Saleem Ansari] Merge branch 'master' into mllib-pipeline
e39db9c [Saleem Ansari] Fix string interpolation in pipeline examples
---
 docs/ml-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index b7b6376e061f7..cac705683c8bc 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -237,7 +237,7 @@ model2.transform(test.toDF)
   .select("features", "label", "myProbability", "prediction")
   .collect()
   .foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
-    println("($features, $label) -> prob=$prob, prediction=$prediction")
+    println(s"($features, $label) -> prob=$prob, prediction=$prediction")
   }
 
 sc.stop()
@@ -391,7 +391,7 @@ model.transform(test.toDF)
   .select("id", "text", "probability", "prediction")
   .collect()
   .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
-    println("($id, $text) --> prob=$prob, prediction=$prediction")
+    println(s"($id, $text) --> prob=$prob, prediction=$prediction")
   }
 
 sc.stop()

From 6845cb2ff475fd794b30b01af5ebc80714b880f0 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 19 May 2015 08:24:57 -0700
Subject: [PATCH 258/320] [SPARK-7681] [MLLIB] remove mima excludes for 1.3

There excludes are unnecessary for 1.3 because the changes were made in 1.4.x.

Author: Xiangrui Meng <meng@databricks.com>

Closes #6254 from mengxr/SPARK-7681-mima and squashes the following commits:

7f0cea0 [Xiangrui Meng] remove mima excludes for 1.3
---
 project/MimaExcludes.scala | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index f8d0160f6445e..03e93a2f98f9b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -187,14 +187,7 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.mllib.linalg.Matrix.isTransposed"),
             ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.linalg.Matrix.foreachActive"),
-            // SPARK-7681 add SparseVector support for gemv
-            ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.linalg.Matrix.multiply"),
-            ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.linalg.DenseMatrix.multiply"),
-            ProblemFilters.exclude[MissingMethodProblem](
-              "org.apache.spark.mllib.linalg.SparseMatrix.multiply")
+              "org.apache.spark.mllib.linalg.Matrix.foreachActive")
           ) ++ Seq(
             // SPARK-5540
             ProblemFilters.exclude[MissingMethodProblem](

From 32fa611b19c6b95d4563be631c5a8ff0cdf3438f Mon Sep 17 00:00:00 2001
From: Dice <poleon.kd@gmail.com>
Date: Tue, 19 May 2015 18:12:05 +0100
Subject: [PATCH 259/320] [SPARK-7704] Updating Programming Guides per
 SPARK-4397

The change per SPARK-4397 makes implicit objects in SparkContext to be found by the compiler automatically. So that we don't need to import the o.a.s.SparkContext._ explicitly any more and can remove some statements around the "implicit conversions" from the latest Programming Guides (1.3.0 and higher)

Author: Dice <poleon.kd@gmail.com>

Closes #6234 from daisukebe/patch-1 and squashes the following commits:

b77ecd9 [Dice] fix a typo
45dfcd3 [Dice] rewording per Sean's advice
a094bcf [Dice] Adding a note for users on any previous releases
a29be5f [Dice] Updating Programming Guides per SPARK-4397
---
 docs/programming-guide.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 0c273769bb14b..07a4d29fe7104 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -41,14 +41,15 @@ In addition, if you wish to access an HDFS cluster, you need to add a dependency
     artifactId = hadoop-client
     version = <your-hdfs-version>
 
-Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines:
+Finally, you need to import some Spark classes into your program. Add the following lines:
 
 {% highlight scala %}
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 import org.apache.spark.SparkConf
 {% endhighlight %}
 
+(Before Spark 1.3.0, you need to explicitly `import org.apache.spark.SparkContext._` to enable essential implicit conversions.)
+
 </div>
 
 <div data-lang="java"  markdown="1">
@@ -821,11 +822,9 @@ by a key.
 
 In Scala, these operations are automatically available on RDDs containing
 [Tuple2](http://www.scala-lang.org/api/{{site.SCALA_VERSION}}/index.html#scala.Tuple2) objects
-(the built-in tuples in the language, created by simply writing `(a, b)`), as long as you
-import `org.apache.spark.SparkContext._` in your program to enable Spark's implicit
-conversions. The key-value pair operations are available in the
+(the built-in tuples in the language, created by simply writing `(a, b)`). The key-value pair operations are available in the
 [PairRDDFunctions](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions) class,
-which automatically wraps around an RDD of tuples if you import the conversions.
+which automatically wraps around an RDD of tuples.
 
 For example, the following code uses the `reduceByKey` operation on key-value pairs to count how
 many times each line of text occurs in a file:

From fb90273212dc7241c9a0c3446e25e0e0b9377750 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 19 May 2015 10:55:21 -0700
Subject: [PATCH 260/320] [SPARK-7047] [ML] ml.Model optional parent support

Made Model.parent transient.  Added Model.hasParent to test for null parent

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #5914 from jkbradley/parent-optional and squashes the following commits:

d501774 [Joseph K. Bradley] Made Model.parent transient.  Added Model.hasParent to test for null parent
---
 mllib/src/main/scala/org/apache/spark/ml/Model.scala         | 5 ++++-
 .../spark/ml/classification/LogisticRegressionSuite.scala    | 1 +
 .../ml/classification/RandomForestClassifierSuite.scala      | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
index 7fd515369b19b..70e7495ac616c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Model.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
@@ -32,7 +32,7 @@ abstract class Model[M <: Model[M]] extends Transformer {
    * The parent estimator that produced this model.
    * Note: For ensembles' component Models, this value can be null.
    */
-  var parent: Estimator[M] = _
+  @transient var parent: Estimator[M] = _
 
   /**
    * Sets the parent of this model (Java API).
@@ -42,6 +42,9 @@ abstract class Model[M <: Model[M]] extends Transformer {
     this.asInstanceOf[M]
   }
 
+  /** Indicates whether this [[Model]] has a corresponding parent. */
+  def hasParent: Boolean = parent != null
+
   override def copy(extra: ParamMap): M = {
     // The default implementation of Params.copy doesn't work for models.
     throw new NotImplementedError(s"${this.getClass} doesn't implement copy(extra: ParamMap)")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 43765241a20b6..97f9749cb4a9a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -83,6 +83,7 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
     assert(model.getRawPredictionCol === "rawPrediction")
     assert(model.getProbabilityCol === "probability")
     assert(model.intercept !== 0.0)
+    assert(model.hasParent)
   }
 
   test("logistic regression doesn't fit intercept when fitIntercept is off") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 08f86fa45bc1d..cdbbacab8e0e3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -162,5 +162,7 @@ private object RandomForestClassifierSuite {
     val oldModelAsNew = RandomForestClassificationModel.fromOld(
       oldModel, newModel.parent.asInstanceOf[RandomForestClassifier], categoricalFeatures)
     TreeTests.checkEqual(oldModelAsNew, newModel)
+    assert(newModel.hasParent)
+    assert(!newModel.trees.head.asInstanceOf[DecisionTreeClassificationModel].hasParent)
   }
 }

From 7b16e9f2118fbfbb1c0ba957161fe500c9aff82a Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 19 May 2015 10:57:47 -0700
Subject: [PATCH 261/320] [SPARK-7678] [ML] Fix default random seed in HasSeed

Changed shared param HasSeed to have default based on hashCode of class name, instead of random number.
Also, removed fixed random seeds from Word2Vec and ALS.

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6251 from jkbradley/scala-fixed-seed and squashes the following commits:

0e37184 [Joseph K. Bradley] Fixed Word2VecSuite, ALSSuite in spark.ml to use original fixed random seeds
678ec3a [Joseph K. Bradley] Removed fixed random seeds from Word2Vec and ALS. Changed shared param HasSeed to have default based on hashCode of class name, instead of random number.
---
 .../org/apache/spark/ml/feature/Word2Vec.scala   |  1 -
 .../ml/param/shared/SharedParamsCodeGen.scala    |  2 +-
 .../spark/ml/param/shared/sharedParams.scala     |  4 ++--
 .../org/apache/spark/ml/recommendation/ALS.scala |  2 +-
 .../apache/spark/ml/feature/Word2VecSuite.scala  |  1 +
 .../spark/ml/recommendation/ALSSuite.scala       | 16 +++++++++-------
 6 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 8ace8c53bb663..90f0be76df44f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -68,7 +68,6 @@ private[feature] trait Word2VecBase extends Params
 
   setDefault(stepSize -> 0.025)
   setDefault(maxIter -> 1)
-  setDefault(seed -> 42L)
 
   /**
    * Validate and transform the input schema.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 5085b798daa17..8b8cb81373a65 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -53,7 +53,7 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[Int]("checkpointInterval", "checkpoint interval (>= 1)",
         isValid = "ParamValidators.gtEq(1)"),
       ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", Some("true")),
-      ParamDesc[Long]("seed", "random seed", Some("Utils.random.nextLong()")),
+      ParamDesc[Long]("seed", "random seed", Some("this.getClass.getName.hashCode.toLong")),
       ParamDesc[Double]("elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]." +
         " For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.",
         isValid = "ParamValidators.inRange(0, 1)"),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 7525d37007377..3a4976d3ddcd1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -232,7 +232,7 @@ private[ml] trait HasFitIntercept extends Params {
 }
 
 /**
- * (private[ml]) Trait for shared param seed (default: Utils.random.nextLong()).
+ * (private[ml]) Trait for shared param seed (default: this.getClass.getName.hashCode.toLong).
  */
 private[ml] trait HasSeed extends Params {
 
@@ -242,7 +242,7 @@ private[ml] trait HasSeed extends Params {
    */
   final val seed: LongParam = new LongParam(this, "seed", "random seed")
 
-  setDefault(seed, Utils.random.nextLong())
+  setDefault(seed, this.getClass.getName.hashCode.toLong)
 
   /** @group getParam */
   final def getSeed: Long = $(seed)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 45c57b50da70f..2a5ddbfae5cdf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -148,7 +148,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR
 
   setDefault(rank -> 10, maxIter -> 10, regParam -> 0.1, numUserBlocks -> 10, numItemBlocks -> 10,
     implicitPrefs -> false, alpha -> 1.0, userCol -> "user", itemCol -> "item",
-    ratingCol -> "rating", nonnegative -> false, checkpointInterval -> 10, seed -> 0L)
+    ratingCol -> "rating", nonnegative -> false, checkpointInterval -> 10)
 
   /**
    * Validates and transforms the input schema.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index 03ba86670d453..43a09cc418703 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -52,6 +52,7 @@ class Word2VecSuite extends FunSuite with MLlibTestSparkContext {
       .setVectorSize(3)
       .setInputCol("text")
       .setOutputCol("result")
+      .setSeed(42L)
       .fit(docDF)
 
     model.transform(docDF).select("result", "expected").collect().foreach {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index fc7349330cf86..6cc6ec94eb643 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -345,6 +345,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
       .setImplicitPrefs(implicitPrefs)
       .setNumUserBlocks(numUserBlocks)
       .setNumItemBlocks(numItemBlocks)
+      .setSeed(0)
     val alpha = als.getAlpha
     val model = als.fit(training.toDF())
     val predictions = model.transform(test.toDF())
@@ -425,17 +426,18 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
     val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
 
     val longRatings = ratings.map(r => Rating(r.user.toLong, r.item.toLong, r.rating))
-    val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4)
+    val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4, seed = 0)
     assert(longUserFactors.first()._1.getClass === classOf[Long])
 
     val strRatings = ratings.map(r => Rating(r.user.toString, r.item.toString, r.rating))
-    val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4)
+    val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4, seed = 0)
     assert(strUserFactors.first()._1.getClass === classOf[String])
   }
 
   test("nonnegative constraint") {
     val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
-    val (userFactors, itemFactors) = ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true)
+    val (userFactors, itemFactors) =
+      ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true, seed = 0)
     def isNonnegative(factors: RDD[(Int, Array[Float])]): Boolean = {
       factors.values.map { _.forall(_ >= 0.0) }.reduce(_ && _)
     }
@@ -459,7 +461,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
   test("partitioner in returned factors") {
     val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
     val (userFactors, itemFactors) = ALS.train(
-      ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4)
+      ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4, seed = 0)
     for ((tpe, factors) <- Seq(("User", userFactors), ("Item", itemFactors))) {
       assert(userFactors.partitioner.isDefined, s"$tpe factors should have partitioner.")
       val part = userFactors.partitioner.get
@@ -476,8 +478,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
 
   test("als with large number of iterations") {
     val (ratings, _) = genExplicitTestData(numUsers = 4, numItems = 4, rank = 1)
-    ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2)
-    ALS.train(
-      ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, implicitPrefs = true)
+    ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, seed = 0)
+    ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2,
+      implicitPrefs = true, seed = 0)
   }
 }

From 3c4c1f96474b3e66fa1d44ac0177f548cf5a3a10 Mon Sep 17 00:00:00 2001
From: Iulian Dragos <jaguarul@gmail.com>
Date: Tue, 19 May 2015 12:14:48 -0700
Subject: [PATCH 262/320] [SPARK-7726] Fix Scaladoc false errors

Visibility rules for static members are different in Scala and Java, and this case requires an explicit static import. Even though these are Java files, they are run through scaladoc, which enforces Scala rules.

Also reverted the commit that reverts the upgrade to 2.11.6

Author: Iulian Dragos <jaguarul@gmail.com>

Closes #6260 from dragos/issue/scaladoc-false-error and squashes the following commits:

f2e998e [Iulian Dragos] Revert "[HOTFIX] Revert "[SPARK-7092] Update spark scala version to 2.11.6""
0bad052 [Iulian Dragos] Fix scaladoc faux-error.
---
 .../org/apache/spark/network/shuffle/protocol/OpenBlocks.java | 3 +++
 .../spark/network/shuffle/protocol/RegisterExecutor.java      | 3 +++
 .../apache/spark/network/shuffle/protocol/StreamHandle.java   | 3 +++
 .../apache/spark/network/shuffle/protocol/UploadBlock.java    | 3 +++
 pom.xml                                                       | 4 ++--
 .../src/main/scala/org/apache/spark/repl/SparkIMain.scala     | 2 +-
 6 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
index 60485bace643c..ce954b8a289e4 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
@@ -24,6 +24,9 @@
 
 import org.apache.spark.network.protocol.Encoders;
 
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
 /** Request to read a set of blocks. Returns {@link StreamHandle}. */
 public class OpenBlocks extends BlockTransferMessage {
   public final String appId;
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
index 38acae3b31d64..cca8b17c4f129 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
@@ -22,6 +22,9 @@
 
 import org.apache.spark.network.protocol.Encoders;
 
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
 /**
  * Initial registration message between an executor and its local shuffle server.
  * Returns nothing (empty bye array).
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
index 9a9220211a50c..1915295aa6cc2 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
@@ -20,6 +20,9 @@
 import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
 
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
 /**
  * Identifier for a fixed number of chunks to read from a stream created by an "open blocks"
  * message. This is used by {@link org.apache.spark.network.shuffle.OneForOneBlockFetcher}.
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
index 2ff9aaa650f92..3caed59d508fd 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
@@ -24,6 +24,9 @@
 
 import org.apache.spark.network.protocol.Encoders;
 
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
 
 /** Request to upload a block with a certain StorageLevel. Returns nothing (empty byte array). */
 public class UploadBlock extends BlockTransferMessage {
diff --git a/pom.xml b/pom.xml
index d903f02c1aed0..c72d7cbf843ef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1799,9 +1799,9 @@
         <property><name>scala-2.11</name></property>
       </activation>
       <properties>
-        <scala.version>2.11.2</scala.version>
+        <scala.version>2.11.6</scala.version>
         <scala.binary.version>2.11</scala.binary.version>
-        <jline.version>2.12</jline.version>
+        <jline.version>2.12.1</jline.version>
         <jline.groupid>jline</jline.groupid>
       </properties>
     </profile>
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
index 1bb62c84abddc..1cb910f376060 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -1129,7 +1129,7 @@ class SparkIMain(@BeanProperty val factory: ScriptEngineFactory, initialSettings
 
     def apply(line: String): Result = debugging(s"""parse("$line")""")  {
       var isIncomplete = false
-      currentRun.reporting.withIncompleteHandler((_, _) => isIncomplete = true) {
+      currentRun.parsing.withIncompleteHandler((_, _) => isIncomplete = true) {
         reporter.reset()
         val trees = newUnitParser(line).parseStats()
         if (reporter.hasErrors) Error

From 68fb2a46edc95f867d4b28597d20da2597f008c1 Mon Sep 17 00:00:00 2001
From: Xusen Yin <yinxusen@gmail.com>
Date: Tue, 19 May 2015 13:43:48 -0700
Subject: [PATCH 263/320] [SPARK-7586] [ML] [DOC] Add docs of Word2Vec in ml
 package

CC jkbradley.

JIRA [issue](https://issues.apache.org/jira/browse/SPARK-7586).

Author: Xusen Yin <yinxusen@gmail.com>

Closes #6181 from yinxusen/SPARK-7586 and squashes the following commits:

77014c5 [Xusen Yin] comment fix
57a4c07 [Xusen Yin] small fix for docs
1178c8f [Xusen Yin] remove the correctness check in java suite
1c3f389 [Xusen Yin] delete sbt commit
1af152b [Xusen Yin] check python example code
1b5369e [Xusen Yin] add docs of word2vec
---
 docs/ml-features.md                           | 89 +++++++++++++++++++
 .../spark/ml/feature/JavaWord2VecSuite.java   | 76 ++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java

diff --git a/docs/ml-features.md b/docs/ml-features.md
index e86f9edc4f68b..63ea3e5db7ac9 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -106,6 +106,95 @@ for features_label in featurized.select("features", "label").take(3):
 </div>
 </div>
 
+## Word2Vec
+
+`Word2Vec` is an `Estimator` which takes sequences of words that represents documents and trains a `Word2VecModel`. The model is a `Map(String, Vector)` essentially, which maps each word to an unique fix-sized vector. The `Word2VecModel` transforms each documents into a vector using the average of all words in the document, which aims to other computations of documents such as similarity calculation consequencely. Please refer to the [MLlib user guide on Word2Vec](mllib-feature-extraction.html#Word2Vec) for more details on Word2Vec.
+
+Word2Vec is implemented in [Word2Vec](api/scala/index.html#org.apache.spark.ml.feature.Word2Vec). In the following code segment, we start with a set of documents, each of them is represented as a sequence of words. For each document, we transform it into a feature vector. This feature vector could then be passed to a learning algorithm.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.Word2Vec
+
+// Input data: Each row is a bag of words from a sentence or document.
+val documentDF = sqlContext.createDataFrame(Seq(
+  "Hi I heard about Spark".split(" "),
+  "I wish Java could use case classes".split(" "),
+  "Logistic regression models are neat".split(" ")
+).map(Tuple1.apply)).toDF("text")
+
+// Learn a mapping from words to Vectors.
+val word2Vec = new Word2Vec()
+  .setInputCol("text")
+  .setOutputCol("result")
+  .setVectorSize(3)
+  .setMinCount(0)
+val model = word2Vec.fit(documentDF)
+val result = model.transform(documentDF)
+result.select("result").take(3).foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+JavaSparkContext jsc = ...
+SQLContext sqlContext = ...
+
+// Input data: Each row is a bag of words from a sentence or document.
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+  RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
+  RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
+  RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+});
+DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema);
+
+// Learn a mapping from words to Vectors.
+Word2Vec word2Vec = new Word2Vec()
+  .setInputCol("text")
+  .setOutputCol("result")
+  .setVectorSize(3)
+  .setMinCount(0);
+Word2VecModel model = word2Vec.fit(documentDF);
+DataFrame result = model.transform(documentDF);
+for (Row r: result.select("result").take(3)) {
+  System.out.println(r);
+}
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import Word2Vec
+
+# Input data: Each row is a bag of words from a sentence or document.
+documentDF = sqlContext.createDataFrame([
+  ("Hi I heard about Spark".split(" "), ),
+  ("I wish Java could use case classes".split(" "), ),
+  ("Logistic regression models are neat".split(" "), )
+], ["text"])
+# Learn a mapping from words to Vectors.
+word2Vec = Word2Vec(vectorSize=3, minCount=0, inputCol="text", outputCol="result")
+model = word2Vec.fit(documentDF)
+result = model.transform(documentDF)
+for feature in result.select("result").take(3):
+  print(feature)
+{% endhighlight %}
+</div>
+</div>
 
 # Feature Transformers
 
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
new file mode 100644
index 0000000000000..39c70157f83c0
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+public class JavaWord2VecSuite {
+  private transient JavaSparkContext jsc;
+  private transient SQLContext sqlContext;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaWord2VecSuite");
+    sqlContext = new SQLContext(jsc);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void testJavaWord2Vec() {
+    JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+      RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
+      RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
+      RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
+    ));
+    StructType schema = new StructType(new StructField[]{
+      new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+    });
+    DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema);
+
+    Word2Vec word2Vec = new Word2Vec()
+      .setInputCol("text")
+      .setOutputCol("result")
+      .setVectorSize(3)
+      .setMinCount(0);
+    Word2VecModel model = word2Vec.fit(documentDF);
+    DataFrame result = model.transform(documentDF);
+
+    for (Row r: result.select("result").collect()) {
+      double[] polyFeatures = ((Vector)r.get(0)).toArray();
+      Assert.assertEquals(polyFeatures.length, 3);
+    }
+  }
+}

From c12dff9b82e4869f866a9b96ce0bf05503dd7dda Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 19 May 2015 13:53:08 -0700
Subject: [PATCH 264/320] [SPARK-7652] [MLLIB] Update the implementation of
 naive Bayes prediction with BLAS

JIRA: https://issues.apache.org/jira/browse/SPARK-7652

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6189 from viirya/naive_bayes_blas_prediction and squashes the following commits:

ab611fd [Liang-Chi Hsieh] Remove unnecessary space.
ddc48b9 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into naive_bayes_blas_prediction
b5772b4 [Liang-Chi Hsieh] Fix binary compatibility.
2f65186 [Liang-Chi Hsieh] Remove toDense.
1b6cdfe [Liang-Chi Hsieh] Update the implementation of naive Bayes prediction with BLAS.
---
 .../mllib/classification/NaiveBayes.scala     | 41 +++++++++++--------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index ac0ebeceaa1df..53fb2cba03cbf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -21,13 +21,11 @@ import java.lang.{Iterable => JIterable}
 
 import scala.collection.JavaConverters._
 
-import breeze.linalg.{Axis, DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
-import breeze.numerics.{exp => brzExp, log => brzLog}
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext, SparkException}
-import org.apache.spark.mllib.linalg.{BLAS, DenseVector, SparseVector, Vector}
+import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
@@ -50,6 +48,9 @@ class NaiveBayesModel private[mllib] (
     val modelType: String)
   extends ClassificationModel with Serializable with Saveable {
 
+  private val piVector = new DenseVector(pi)
+  private val thetaMatrix = new DenseMatrix(labels.size, theta(0).size, theta.flatten, true)
+
   private[mllib] def this(labels: Array[Double], pi: Array[Double], theta: Array[Array[Double]]) =
     this(labels, pi, theta, "Multinomial")
 
@@ -60,17 +61,18 @@ class NaiveBayesModel private[mllib] (
       theta: JIterable[JIterable[Double]]) =
     this(labels.asScala.toArray, pi.asScala.toArray, theta.asScala.toArray.map(_.asScala.toArray))
 
-  private val brzPi = new BDV[Double](pi)
-  private val brzTheta = new BDM(theta(0).length, theta.length, theta.flatten).t
-
   // Bernoulli scoring requires log(condprob) if 1, log(1-condprob) if 0.
-  // This precomputes log(1.0 - exp(theta)) and its sum  which are used for the  linear algebra
+  // This precomputes log(1.0 - exp(theta)) and its sum which are used for the linear algebra
   // application of this condition (in predict function).
-  private val (brzNegTheta, brzNegThetaSum) = modelType match {
+  private val (thetaMinusNegTheta, negThetaSum) = modelType match {
     case "Multinomial" => (None, None)
     case "Bernoulli" =>
-      val negTheta = brzLog((brzExp(brzTheta.copy) :*= (-1.0)) :+= 1.0) // log(1.0 - exp(x))
-      (Option(negTheta), Option(brzSum(negTheta, Axis._1)))
+      val negTheta = thetaMatrix.map(value => math.log(1.0 - math.exp(value)))
+      val ones = new DenseVector(Array.fill(thetaMatrix.numCols){1.0})
+      val thetaMinusNegTheta = thetaMatrix.map { value =>
+        value - math.log(1.0 - math.exp(value))
+      }
+      (Option(thetaMinusNegTheta), Option(negTheta.multiply(ones)))
     case _ =>
       // This should never happen.
       throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType")
@@ -85,17 +87,22 @@ class NaiveBayesModel private[mllib] (
   }
 
   override def predict(testData: Vector): Double = {
-    val brzData = testData.toBreeze
     modelType match {
       case "Multinomial" =>
-        labels(brzArgmax(brzPi + brzTheta * brzData))
+        val prob = thetaMatrix.multiply(testData)
+        BLAS.axpy(1.0, piVector, prob)
+        labels(prob.argmax)
       case "Bernoulli" =>
-        if (!brzData.forall(v => v == 0.0 || v == 1.0)) {
-          throw new SparkException(
-            s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $testData.")
+        testData.foreachActive { (index, value) =>
+          if (value != 0.0 && value != 1.0) {
+            throw new SparkException(
+              s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $testData.")
+          }
         }
-        labels(brzArgmax(brzPi +
-          (brzTheta - brzNegTheta.get) * brzData + brzNegThetaSum.get))
+        val prob = thetaMinusNegTheta.get.multiply(testData)
+        BLAS.axpy(1.0, piVector, prob)
+        BLAS.axpy(1.0, negThetaSum.get, prob)
+        labels(prob.argmax)
       case _ =>
         // This should never happen.
         throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType")

From 4de74d2602f6577c3c8458aa85377e89c19724ca Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Tue, 19 May 2015 14:23:28 -0700
Subject: [PATCH 265/320] [SPARK-7738] [SQL] [PySpark] add reader and writer
 API in Python

cc rxin, please take a quick look, I'm working on tests.

Author: Davies Liu <davies@databricks.com>

Closes #6238 from davies/readwrite and squashes the following commits:

c7200eb [Davies Liu] update tests
9cbf01b [Davies Liu] Merge branch 'master' of github.com:apache/spark into readwrite
f0c5a04 [Davies Liu] use sqlContext.read.load
5f68bc8 [Davies Liu] update tests
6437e9a [Davies Liu] Merge branch 'master' of github.com:apache/spark into readwrite
bcc6668 [Davies Liu] add reader amd writer API in Python
---
 .../apache/spark/api/python/PythonUtils.scala |  11 +-
 python/pyspark/sql/__init__.py                |   1 +
 python/pyspark/sql/context.py                 |  28 +-
 python/pyspark/sql/dataframe.py               |  67 ++--
 python/pyspark/sql/readwriter.py              | 338 ++++++++++++++++++
 python/pyspark/sql/tests.py                   |  77 ++--
 6 files changed, 430 insertions(+), 92 deletions(-)
 create mode 100644 python/pyspark/sql/readwriter.py

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index efb6b93cfc35d..90dacaeb93429 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -50,8 +50,15 @@ private[spark] object PythonUtils {
   /**
    * Convert list of T into seq of T (for calling API with varargs)
    */
-  def toSeq[T](cols: JList[T]): Seq[T] = {
-    cols.toList.toSeq
+  def toSeq[T](vs: JList[T]): Seq[T] = {
+    vs.toList.toSeq
+  }
+
+  /**
+   * Convert list of T into array of T (for calling API with array)
+   */
+  def toArray[T](vs: JList[T]): Array[T] = {
+    vs.toArray().asInstanceOf[Array[T]]
   }
 
   /**
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index 19805e291e91b..634c575ecd80e 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -58,6 +58,7 @@
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame, SchemaRDD, DataFrameNaFunctions, DataFrameStatFunctions
 from pyspark.sql.group import GroupedData
+from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter
 
 __all__ = [
     'SQLContext', 'HiveContext', 'DataFrame', 'GroupedData', 'Column', 'Row',
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 9f26d13235d5f..7543475014bd2 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -31,6 +31,7 @@
 from pyspark.sql.types import Row, StringType, StructType, _verify_type, \
     _infer_schema, _has_nulltype, _merge_type, _create_converter, _python_to_sql_converter
 from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.readwriter import DataFrameReader
 
 try:
     import pandas
@@ -457,19 +458,7 @@ def load(self, path=None, source=None, schema=None, **options):
 
         Optionally, a schema can be provided as the schema of the returned DataFrame.
         """
-        if path is not None:
-            options["path"] = path
-        if source is None:
-            source = self.getConf("spark.sql.sources.default",
-                                  "org.apache.spark.sql.parquet")
-        if schema is None:
-            df = self._ssql_ctx.load(source, options)
-        else:
-            if not isinstance(schema, StructType):
-                raise TypeError("schema should be StructType")
-            scala_datatype = self._ssql_ctx.parseDataType(schema.json())
-            df = self._ssql_ctx.load(source, scala_datatype, options)
-        return DataFrame(df, self)
+        return self.read.load(path, source, schema, **options)
 
     def createExternalTable(self, tableName, path=None, source=None,
                             schema=None, **options):
@@ -567,6 +556,19 @@ def clearCache(self):
         """Removes all cached tables from the in-memory cache. """
         self._ssql_ctx.clearCache()
 
+    @property
+    def read(self):
+        """
+        Returns a :class:`DataFrameReader` that can be used to read data
+        in as a :class:`DataFrame`.
+
+        ::note: Experimental
+
+        >>> sqlContext.read
+        <pyspark.sql.readwriter.DataFrameReader object at ...>
+        """
+        return DataFrameReader(self)
+
 
 class HiveContext(SQLContext):
     """A variant of Spark SQL that integrates with data stored in Hive.
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index e4a191a9ef07f..f2280b5100e53 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -29,9 +29,10 @@
 from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
-from pyspark.sql.types import *
 from pyspark.sql.types import _create_cls, _parse_datatype_json_string
 from pyspark.sql.column import Column, _to_seq, _to_java_column
+from pyspark.sql.readwriter import DataFrameWriter
+from pyspark.sql.types import *
 
 __all__ = ["DataFrame", "SchemaRDD", "DataFrameNaFunctions", "DataFrameStatFunctions"]
 
@@ -151,25 +152,6 @@ def insertInto(self, tableName, overwrite=False):
         """
         self._jdf.insertInto(tableName, overwrite)
 
-    def _java_save_mode(self, mode):
-        """Returns the Java save mode based on the Python save mode represented by a string.
-        """
-        jSaveMode = self._sc._jvm.org.apache.spark.sql.SaveMode
-        jmode = jSaveMode.ErrorIfExists
-        mode = mode.lower()
-        if mode == "append":
-            jmode = jSaveMode.Append
-        elif mode == "overwrite":
-            jmode = jSaveMode.Overwrite
-        elif mode == "ignore":
-            jmode = jSaveMode.Ignore
-        elif mode == "error":
-            pass
-        else:
-            raise ValueError(
-                "Only 'append', 'overwrite', 'ignore', and 'error' are acceptable save mode.")
-        return jmode
-
     def saveAsTable(self, tableName, source=None, mode="error", **options):
         """Saves the contents of this :class:`DataFrame` to a data source as a table.
 
@@ -185,11 +167,7 @@ def saveAsTable(self, tableName, source=None, mode="error", **options):
         * `error`: Throw an exception if data already exists.
         * `ignore`: Silently ignore this operation if data already exists.
         """
-        if source is None:
-            source = self.sql_ctx.getConf("spark.sql.sources.default",
-                                          "org.apache.spark.sql.parquet")
-        jmode = self._java_save_mode(mode)
-        self._jdf.saveAsTable(tableName, source, jmode, options)
+        self.write.saveAsTable(tableName, source, mode, **options)
 
     def save(self, path=None, source=None, mode="error", **options):
         """Saves the contents of the :class:`DataFrame` to a data source.
@@ -206,13 +184,22 @@ def save(self, path=None, source=None, mode="error", **options):
         * `error`: Throw an exception if data already exists.
         * `ignore`: Silently ignore this operation if data already exists.
         """
-        if path is not None:
-            options["path"] = path
-        if source is None:
-            source = self.sql_ctx.getConf("spark.sql.sources.default",
-                                          "org.apache.spark.sql.parquet")
-        jmode = self._java_save_mode(mode)
-        self._jdf.save(source, jmode, options)
+        return self.write.save(path, source, mode, **options)
+
+    @property
+    def write(self):
+        """
+        Interface for saving the content of the :class:`DataFrame` out
+        into external storage.
+
+        :return :class:`DataFrameWriter`
+
+        ::note: Experimental
+
+        >>> df.write
+        <pyspark.sql.readwriter.DataFrameWriter object at ...>
+        """
+        return DataFrameWriter(self)
 
     @property
     def schema(self):
@@ -411,9 +398,19 @@ def unpersist(self, blocking=True):
         self._jdf.unpersist(blocking)
         return self
 
-    # def coalesce(self, numPartitions, shuffle=False):
-    #     rdd = self._jdf.coalesce(numPartitions, shuffle, None)
-    #     return DataFrame(rdd, self.sql_ctx)
+    def coalesce(self, numPartitions):
+        """
+        Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions.
+
+        Similar to coalesce defined on an :class:`RDD`, this operation results in a
+        narrow dependency, e.g. if you go from 1000 partitions to 100 partitions,
+        there will not be a shuffle, instead each of the 100 new partitions will
+        claim 10 of the current partitions.
+
+        >>> df.coalesce(1).rdd.getNumPartitions()
+        1
+        """
+        return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx)
 
     def repartition(self, numPartitions):
         """Returns a new :class:`DataFrame` that has exactly ``numPartitions`` partitions.
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
new file mode 100644
index 0000000000000..e2b27fb587e73
--- /dev/null
+++ b/python/pyspark/sql/readwriter.py
@@ -0,0 +1,338 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from py4j.java_gateway import JavaClass
+
+from pyspark.sql.column import _to_seq
+from pyspark.sql.types import *
+
+__all__ = ["DataFrameReader", "DataFrameWriter"]
+
+
+class DataFrameReader(object):
+    """
+    Interface used to load a :class:`DataFrame` from external storage systems
+    (e.g. file systems, key-value stores, etc). Use :func:`SQLContext.read`
+    to access this.
+
+    ::Note: Experimental
+    """
+
+    def __init__(self, sqlContext):
+        self._jreader = sqlContext._ssql_ctx.read()
+        self._sqlContext = sqlContext
+
+    def _df(self, jdf):
+        from pyspark.sql.dataframe import DataFrame
+        return DataFrame(jdf, self._sqlContext)
+
+    def load(self, path=None, format=None, schema=None, **options):
+        """Loads data from a data source and returns it as a :class`DataFrame`.
+
+        :param path: optional string for file-system backed data sources.
+        :param format: optional string for format of the data source. Default to 'parquet'.
+        :param schema: optional :class:`StructType` for the input schema.
+        :param options: all other string options
+        """
+        jreader = self._jreader
+        if format is not None:
+            jreader = jreader.format(format)
+        if schema is not None:
+            if not isinstance(schema, StructType):
+                raise TypeError("schema should be StructType")
+            jschema = self._sqlContext._ssql_ctx.parseDataType(schema.json())
+            jreader = jreader.schema(jschema)
+        for k in options:
+            jreader = jreader.option(k, options[k])
+        if path is not None:
+            return self._df(jreader.load(path))
+        else:
+            return self._df(jreader.load())
+
+    def json(self, path, schema=None):
+        """
+        Loads a JSON file (one object per line) and returns the result as
+        a :class`DataFrame`.
+
+        If the ``schema`` parameter is not specified, this function goes
+        through the input once to determine the input schema.
+
+        :param path: string, path to the JSON dataset.
+        :param schema: an optional :class:`StructType` for the input schema.
+
+        >>> import tempfile, shutil
+        >>> jsonFile = tempfile.mkdtemp()
+        >>> shutil.rmtree(jsonFile)
+        >>> with open(jsonFile, 'w') as f:
+        ...     f.writelines(jsonStrings)
+        >>> df1 = sqlContext.read.json(jsonFile)
+        >>> df1.printSchema()
+        root
+         |-- field1: long (nullable = true)
+         |-- field2: string (nullable = true)
+         |-- field3: struct (nullable = true)
+         |    |-- field4: long (nullable = true)
+
+        >>> from pyspark.sql.types import *
+        >>> schema = StructType([
+        ...     StructField("field2", StringType()),
+        ...     StructField("field3",
+        ...         StructType([StructField("field5", ArrayType(IntegerType()))]))])
+        >>> df2 = sqlContext.read.json(jsonFile, schema)
+        >>> df2.printSchema()
+        root
+         |-- field2: string (nullable = true)
+         |-- field3: struct (nullable = true)
+         |    |-- field5: array (nullable = true)
+         |    |    |-- element: integer (containsNull = true)
+        """
+        if schema is None:
+            jdf = self._jreader.json(path)
+        else:
+            jschema = self._sqlContext._ssql_ctx.parseDataType(schema.json())
+            jdf = self._jreader.schema(jschema).json(path)
+        return self._df(jdf)
+
+    def table(self, tableName):
+        """Returns the specified table as a :class:`DataFrame`.
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        >>> df2 = sqlContext.read.table("table1")
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        """
+        return self._df(self._jreader.table(tableName))
+
+    def parquet(self, *path):
+        """Loads a Parquet file, returning the result as a :class:`DataFrame`.
+
+        >>> import tempfile, shutil
+        >>> parquetFile = tempfile.mkdtemp()
+        >>> shutil.rmtree(parquetFile)
+        >>> df.saveAsParquetFile(parquetFile)
+        >>> df2 = sqlContext.read.parquet(parquetFile)
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        """
+        return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, path)))
+
+    def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None,
+             predicates=None, properties={}):
+        """
+        Construct a :class:`DataFrame` representing the database table accessible
+        via JDBC URL `url` named `table` and connection `properties`.
+
+        The `column` parameter could be used to partition the table, then it will
+        be retrieved in parallel based on the parameters passed to this function.
+
+        The `predicates` parameter gives a list expressions suitable for inclusion
+        in WHERE clauses; each one defines one partition of the :class:`DataFrame`.
+
+        ::Note: Don't create too many partitions in parallel on a large cluster;
+        otherwise Spark might crash your external database systems.
+
+        :param url: a JDBC URL
+        :param table: name of table
+        :param column: the column used to partition
+        :param lowerBound: the lower bound of partition column
+        :param upperBound: the upper bound of the partition column
+        :param numPartitions: the number of partitions
+        :param predicates: a list of expressions
+        :param properties: JDBC database connection arguments, a list of arbitrary string
+                           tag/value. Normally at least a "user" and "password" property
+                           should be included.
+        :return: a DataFrame
+        """
+        jprop = JavaClass("java.util.Properties", self._sqlContext._sc._gateway._gateway_client)()
+        for k in properties:
+            jprop.setProperty(k, properties[k])
+        if column is not None:
+            if numPartitions is None:
+                numPartitions = self._sqlContext._sc.defaultParallelism
+            return self._df(self._jreader.jdbc(url, table, column, int(lowerBound), int(upperBound),
+                                               int(numPartitions), jprop))
+        if predicates is not None:
+            arr = self._sqlContext._sc._jvm.PythonUtils.toArray(predicates)
+            return self._df(self._jreader.jdbc(url, table, arr, jprop))
+        return self._df(self._jreader.jdbc(url, table, jprop))
+
+
+class DataFrameWriter(object):
+    """
+    Interface used to write a [[DataFrame]] to external storage systems
+    (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.write`
+    to access this.
+
+    ::Note: Experimental
+    """
+    def __init__(self, df):
+        self._df = df
+        self._sqlContext = df.sql_ctx
+        self._jwrite = df._jdf.write()
+
+    def save(self, path=None, format=None, mode="error", **options):
+        """
+        Saves the contents of the :class:`DataFrame` to a data source.
+
+        The data source is specified by the ``format`` and a set of ``options``.
+        If ``format`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        Additionally, mode is used to specify the behavior of the save operation when
+        data already exists in the data source. There are four modes:
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        :param path: the path in a Hadoop supported file system
+        :param format: the format used to save
+        :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error)
+        :param options: all other string options
+        """
+        jwrite = self._jwrite.mode(mode)
+        if format is not None:
+            jwrite = jwrite.format(format)
+        for k in options:
+            jwrite = jwrite.option(k, options[k])
+        if path is None:
+            jwrite.save()
+        else:
+            jwrite.save(path)
+
+    def saveAsTable(self, name, format=None, mode="error", **options):
+        """
+        Saves the contents of this :class:`DataFrame` to a data source as a table.
+
+        The data source is specified by the ``source`` and a set of ``options``.
+        If ``source`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        Additionally, mode is used to specify the behavior of the saveAsTable operation when
+        table already exists in the data source. There are four modes:
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        :param name: the table name
+        :param format: the format used to save
+        :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error)
+        :param options: all other string options
+        """
+        jwrite = self._jwrite.mode(mode)
+        if format is not None:
+            jwrite = jwrite.format(format)
+        for k in options:
+            jwrite = jwrite.option(k, options[k])
+        return jwrite.saveAsTable(name)
+
+    def json(self, path, mode="error"):
+        """
+        Saves the content of the :class:`DataFrame` in JSON format at the
+        specified path.
+
+        Additionally, mode is used to specify the behavior of the save operation when
+        data already exists in the data source. There are four modes:
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error)
+        """
+        return self._jwrite.mode(mode).json(path)
+
+    def parquet(self, path, mode="error"):
+        """
+        Saves the content of the :class:`DataFrame` in Parquet format at the
+        specified path.
+
+        Additionally, mode is used to specify the behavior of the save operation when
+        data already exists in the data source. There are four modes:
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error)
+        """
+        return self._jwrite.mode(mode).parquet(path)
+
+    def jdbc(self, url, table, mode="error", properties={}):
+        """
+        Saves the content of the :class:`DataFrame` to a external database table
+        via JDBC.
+
+        In the case the table already exists in the external database,
+        behavior of this function depends on the save mode, specified by the `mode`
+        function (default to throwing an exception). There are four modes:
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        :param url: a JDBC URL of the form `jdbc:subprotocol:subname`
+        :param table: Name of the table in the external database.
+        :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error)
+        :param properties: JDBC database connection arguments, a list of
+                                    arbitrary string tag/value. Normally at least a
+                                    "user" and "password" property should be included.
+        """
+        jprop = JavaClass("java.util.Properties", self._sqlContext._sc._gateway._gateway_client)()
+        for k in properties:
+            jprop.setProperty(k, properties[k])
+        self._jwrite.mode(mode).jdbc(url, table, jprop)
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import Row, SQLContext
+    import pyspark.sql.readwriter
+    globs = pyspark.sql.readwriter.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['sqlContext'] = SQLContext(sc)
+    globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
+        .toDF(StructType([StructField('age', IntegerType()),
+                          StructField('name', StringType())]))
+    jsonStrings = [
+        '{"field1": 1, "field2": "row1", "field3":{"field4":11}}',
+        '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},'
+        '"field6":[{"field7": "row2"}]}',
+        '{"field1" : null, "field2": "row3", '
+        '"field3":{"field4":33, "field5": []}}'
+    ]
+    globs['jsonStrings'] = jsonStrings
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.readwriter, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 84ae36f2fd026..7e349962416c9 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -485,29 +485,29 @@ def test_save_and_load(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
-        df.save(tmpPath, "org.apache.spark.sql.json", "error")
-        actual = self.sqlCtx.load(tmpPath, "org.apache.spark.sql.json")
-        self.assertTrue(sorted(df.collect()) == sorted(actual.collect()))
+        df.write.json(tmpPath)
+        actual = self.sqlCtx.read.json(tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
         schema = StructType([StructField("value", StringType(), True)])
-        actual = self.sqlCtx.load(tmpPath, "org.apache.spark.sql.json", schema)
-        self.assertTrue(sorted(df.select("value").collect()) == sorted(actual.collect()))
+        actual = self.sqlCtx.read.json(tmpPath, schema)
+        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
 
-        df.save(tmpPath, "org.apache.spark.sql.json", "overwrite")
-        actual = self.sqlCtx.load(tmpPath, "org.apache.spark.sql.json")
-        self.assertTrue(sorted(df.collect()) == sorted(actual.collect()))
+        df.write.json(tmpPath, "overwrite")
+        actual = self.sqlCtx.read.json(tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
-        df.save(source="org.apache.spark.sql.json", mode="overwrite", path=tmpPath,
-                noUse="this options will not be used in save.")
-        actual = self.sqlCtx.load(source="org.apache.spark.sql.json", path=tmpPath,
-                                  noUse="this options will not be used in load.")
-        self.assertTrue(sorted(df.collect()) == sorted(actual.collect()))
+        df.write.save(format="json", mode="overwrite", path=tmpPath,
+                      noUse="this options will not be used in save.")
+        actual = self.sqlCtx.read.load(format="json", path=tmpPath,
+                                       noUse="this options will not be used in load.")
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
         defaultDataSourceName = self.sqlCtx.getConf("spark.sql.sources.default",
                                                     "org.apache.spark.sql.parquet")
         self.sqlCtx.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
         actual = self.sqlCtx.load(path=tmpPath)
-        self.assertTrue(sorted(df.collect()) == sorted(actual.collect()))
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
         self.sqlCtx.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
 
         shutil.rmtree(tmpPath)
@@ -767,51 +767,44 @@ def test_save_and_load_table(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
-        df.saveAsTable("savedJsonTable", "org.apache.spark.sql.json", "append", path=tmpPath)
-        actual = self.sqlCtx.createExternalTable("externalJsonTable", tmpPath,
-                                                 "org.apache.spark.sql.json")
-        self.assertTrue(
-            sorted(df.collect()) ==
-            sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect()))
-        self.assertTrue(
-            sorted(df.collect()) ==
-            sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect()))
-        self.assertTrue(sorted(df.collect()) == sorted(actual.collect()))
+        df.write.saveAsTable("savedJsonTable", "json", "append", path=tmpPath)
+        actual = self.sqlCtx.createExternalTable("externalJsonTable", tmpPath, "json")
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
         self.sqlCtx.sql("DROP TABLE externalJsonTable")
 
-        df.saveAsTable("savedJsonTable", "org.apache.spark.sql.json", "overwrite", path=tmpPath)
+        df.write.saveAsTable("savedJsonTable", "json", "overwrite", path=tmpPath)
         schema = StructType([StructField("value", StringType(), True)])
-        actual = self.sqlCtx.createExternalTable("externalJsonTable",
-                                                 source="org.apache.spark.sql.json",
+        actual = self.sqlCtx.createExternalTable("externalJsonTable", source="json",
                                                  schema=schema, path=tmpPath,
                                                  noUse="this options will not be used")
-        self.assertTrue(
-            sorted(df.collect()) ==
-            sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect()))
-        self.assertTrue(
-            sorted(df.select("value").collect()) ==
-            sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect()))
-        self.assertTrue(sorted(df.select("value").collect()) == sorted(actual.collect()))
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect()))
+        self.assertEqual(sorted(df.select("value").collect()),
+                         sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect()))
+        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
         self.sqlCtx.sql("DROP TABLE savedJsonTable")
         self.sqlCtx.sql("DROP TABLE externalJsonTable")
 
         defaultDataSourceName = self.sqlCtx.getConf("spark.sql.sources.default",
                                                     "org.apache.spark.sql.parquet")
         self.sqlCtx.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
-        df.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
+        df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
         actual = self.sqlCtx.createExternalTable("externalJsonTable", path=tmpPath)
-        self.assertTrue(
-            sorted(df.collect()) ==
-            sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect()))
-        self.assertTrue(
-            sorted(df.collect()) ==
-            sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect()))
-        self.assertTrue(sorted(df.collect()) == sorted(actual.collect()))
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
         self.sqlCtx.sql("DROP TABLE savedJsonTable")
         self.sqlCtx.sql("DROP TABLE externalJsonTable")
         self.sqlCtx.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
 
         shutil.rmtree(tmpPath)
 
+
 if __name__ == "__main__":
     unittest.main()

From bcb1ff81468eb4afc7c03b2bca18e99cc1ccf6b8 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Tue, 19 May 2015 15:20:46 -0700
Subject: [PATCH 266/320] [SPARK-7662] [SQL] Resolve correct names for
 generator in projection

```
select explode(map(value, key)) from src;
```
Throws exception
```
org.apache.spark.sql.AnalysisException: The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF expected 2 aliases but got _c0 ;
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:43)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveGenerate$$makeGeneratorOutput(Analyzer.scala:605)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate$$anonfun$apply$16$$anonfun$22.apply(Analyzer.scala:562)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate$$anonfun$apply$16$$anonfun$22.apply(Analyzer.scala:548)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate$$anonfun$apply$16.applyOrElse(Analyzer.scala:548)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate$$anonfun$apply$16.applyOrElse(Analyzer.scala:538)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:222)
```

Author: Cheng Hao <hao.cheng@intel.com>

Closes #6178 from chenghao-intel/explode and squashes the following commits:

916fbe9 [Cheng Hao] add more strict rules for TGF alias
5c3f2c5 [Cheng Hao] fix bug in unit test
e1d93ab [Cheng Hao] Add more unit test
19db09e [Cheng Hao] resolve names for generator in projection
---
 .../sql/catalyst/analysis/Analyzer.scala      | 15 +++++++++++
 .../sql/hive/execution/HiveQuerySuite.scala   |  6 ++---
 .../sql/hive/execution/SQLQuerySuite.scala    | 25 ++++++++++++++++++-
 3 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index dfa4215f2efe5..c239e83271615 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -561,6 +561,21 @@ class Analyzer(
     /** Extracts a [[Generator]] expression and any names assigned by aliases to their output. */
     private object AliasedGenerator {
       def unapply(e: Expression): Option[(Generator, Seq[String])] = e match {
+        case Alias(g: Generator, name)
+          if g.elementTypes.size > 1 && java.util.regex.Pattern.matches("_c[0-9]+", name) => {
+          // Assume the default name given by parser is "_c[0-9]+",
+          // TODO in long term, move the naming logic from Parser to Analyzer.
+          // In projection, Parser gave default name for TGF as does for normal UDF,
+          // but the TGF probably have multiple output columns/names.
+          //    e.g. SELECT explode(map(key, value)) FROM src;
+          // Let's simply ignore the default given name for this case.
+          Some((g, Nil))
+        }
+        case Alias(g: Generator, name) if g.elementTypes.size > 1 =>
+          // If not given the default names, and the TGF with multiple output columns
+          failAnalysis(
+            s"""Expect multiple names given for ${g.getClass.getName},
+               |but only single name '${name}' specified""".stripMargin)
         case Alias(g: Generator, name) => Some((g, name :: Nil))
         case MultiAlias(g: Generator, names) => Some(g, names)
         case _ => None
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 089a57e25c08d..e7aec0b188c66 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -111,13 +111,13 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
       |  SELECT key FROM gen_tmp ORDER BY key ASC;
     """.stripMargin)
 
-  test("multiple generator in projection") {
+  test("multiple generators in projection") {
     intercept[AnalysisException] {
-      sql("SELECT explode(map(key, value)), key FROM src").collect()
+      sql("SELECT explode(array(key, key)), explode(array(key, key)) FROM src").collect()
     }
 
     intercept[AnalysisException] {
-      sql("SELECT explode(map(key, value)) as k1, k2, key FROM src").collect()
+      sql("SELECT explode(array(key, key)) as k1, explode(array(key, key)) FROM src").collect()
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index e60d00e63574d..fbbf6ba5947dc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -548,13 +548,36 @@ class SQLQuerySuite extends QueryTest {
     dropTempTable("data")
   }
 
-  test("resolve udtf with single alias") {
+  test("resolve udtf in projection #1") {
     val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}"""))
     read.json(rdd).registerTempTable("data")
     val df = sql("SELECT explode(a) AS val FROM data")
     val col = df("val")
   }
 
+  test("resolve udtf in projection #2") {
+    val rdd = sparkContext.makeRDD((1 to 2).map(i => s"""{"a":[$i, ${i + 1}]}"""))
+    jsonRDD(rdd).registerTempTable("data")
+    checkAnswer(sql("SELECT explode(map(1, 1)) FROM data LIMIT 1"), Row(1, 1) :: Nil)
+    checkAnswer(sql("SELECT explode(map(1, 1)) as (k1, k2) FROM data LIMIT 1"), Row(1, 1) :: Nil)
+    intercept[AnalysisException] {
+      sql("SELECT explode(map(1, 1)) as k1 FROM data LIMIT 1")
+    }
+
+    intercept[AnalysisException] {
+      sql("SELECT explode(map(1, 1)) as (k1, k2, k3) FROM data LIMIT 1")
+    }
+  }
+
+  // TGF with non-TGF in project is allowed in Spark SQL, but not in Hive
+  test("TGF with non-TGF in projection") {
+    val rdd = sparkContext.makeRDD( """{"a": "1", "b":"1"}""" :: Nil)
+    jsonRDD(rdd).registerTempTable("data")
+    checkAnswer(
+      sql("SELECT explode(map(a, b)) as (k1, k2), a, b FROM data"),
+      Row("1", "1", "1", "1") :: Nil)
+  }
+
   test("logical.Project should not be resolved if it contains aggregates or generators") {
     // This test is used to test the fix of SPARK-5875.
     // The original issue was that Project's resolved will be true when it contains

From 2bc5e0616d878b09daa8e31a7a1fdb7127bca079 Mon Sep 17 00:00:00 2001
From: alyaxey <oleksii.sliusarenko@grammarly.com>
Date: Tue, 19 May 2015 16:45:52 -0700
Subject: [PATCH 267/320] [SPARK-6246] [EC2] fixed support for more than 100
 nodes

This is a small fix. But it is important for amazon users because as the ticket states, "spark-ec2 can't handle clusters with > 100 nodes" now.

Author: alyaxey <oleksii.sliusarenko@grammarly.com>

Closes #6267 from alyaxey/ec2_100_nodes_fix and squashes the following commits:

1e0d747 [alyaxey] [SPARK-6246] fixed support for more than 100 nodes
---
 ec2/spark_ec2.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index be92d5f45aa77..c6d5a1f0d0a81 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -864,7 +864,11 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state):
         for i in cluster_instances:
             i.update()
 
-        statuses = conn.get_all_instance_status(instance_ids=[i.id for i in cluster_instances])
+        max_batch = 100
+        statuses = []
+        for j in xrange(0, len(cluster_instances), max_batch):
+            batch = [i.id for i in cluster_instances[j:j + max_batch]]
+            statuses.extend(conn.get_all_instance_status(instance_ids=batch))
 
         if cluster_state == 'ssh-ready':
             if all(i.state == 'running' for i in cluster_instances) and \

From 3860520633770cc5719b2cdebe6dc3608798386d Mon Sep 17 00:00:00 2001
From: Mike Dusenberry <dusenberrymw@gmail.com>
Date: Tue, 19 May 2015 17:18:08 -0700
Subject: [PATCH 268/320] [SPARK-7744] [DOCS] [MLLIB] Distributed matrix"
 section in MLlib "Data Types" documentation should be reordered.

The documentation for BlockMatrix should come after RowMatrix, IndexedRowMatrix, and CoordinateMatrix, as BlockMatrix references the later three types, and RowMatrix is considered the "basic" distributed matrix.  This will improve comprehensibility of the "Distributed matrix" section, especially for the new reader.

Author: Mike Dusenberry <dusenberrymw@gmail.com>

Closes #6270 from dusenberrymw/Reorder_MLlib_Data_Types_Distributed_matrix_docs and squashes the following commits:

6313bab [Mike Dusenberry] The documentation for BlockMatrix should come after RowMatrix, IndexedRowMatrix, and CoordinateMatrix, as BlockMatrix references the later three types, and RowMatrix is considered the "basic" distributed matrix.  This will improve comprehensibility of the "Distributed matrix" section, especially for the new reader.
---
 docs/mllib-data-types.md | 128 +++++++++++++++++++--------------------
 1 file changed, 64 insertions(+), 64 deletions(-)

diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index acec0426dc69b..d824dab1d7f7b 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -296,70 +296,6 @@ backed by an RDD of its entries.
 The underlying RDDs of a distributed matrix must be deterministic, because we cache the matrix size.
 In general the use of non-deterministic RDDs can lead to errors.
 
-### BlockMatrix
-
-A `BlockMatrix` is a distributed matrix backed by an RDD of `MatrixBlock`s, where a `MatrixBlock` is
-a tuple of `((Int, Int), Matrix)`, where the `(Int, Int)` is the index of the block, and `Matrix` is
-the sub-matrix at the given index with size `rowsPerBlock` x `colsPerBlock`.
-`BlockMatrix` supports methods such as `add` and `multiply` with another `BlockMatrix`.
-`BlockMatrix` also has a helper function `validate` which can be used to check whether the
-`BlockMatrix` is set up properly.
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-
-A [`BlockMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) can be
-most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`.
-`toBlockMatrix` creates blocks of size 1024 x 1024 by default.
-Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`.
-
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
-
-val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries
-// Create a CoordinateMatrix from an RDD[MatrixEntry].
-val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
-// Transform the CoordinateMatrix to a BlockMatrix
-val matA: BlockMatrix = coordMat.toBlockMatrix().cache()
-
-// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
-// Nothing happens if it is valid.
-matA.validate()
-
-// Calculate A^T A.
-val ata = matA.transpose.multiply(matA)
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-
-A [`BlockMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) can be
-most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`.
-`toBlockMatrix` creates blocks of size 1024 x 1024 by default.
-Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`.
-
-{% highlight java %}
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.distributed.BlockMatrix;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
-
-JavaRDD<MatrixEntry> entries = ... // a JavaRDD of (i, j, v) Matrix Entries
-// Create a CoordinateMatrix from a JavaRDD<MatrixEntry>.
-CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd());
-// Transform the CoordinateMatrix to a BlockMatrix
-BlockMatrix matA = coordMat.toBlockMatrix().cache();
-
-// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
-// Nothing happens if it is valid.
-matA.validate();
-
-// Calculate A^T A.
-BlockMatrix ata = matA.transpose().multiply(matA);
-{% endhighlight %}
-</div>
-</div>
-
 ### RowMatrix
 
 A `RowMatrix` is a row-oriented distributed matrix without meaningful row indices, backed by an RDD
@@ -530,3 +466,67 @@ IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix();
 {% endhighlight %}
 </div>
 </div>
+
+### BlockMatrix
+
+A `BlockMatrix` is a distributed matrix backed by an RDD of `MatrixBlock`s, where a `MatrixBlock` is
+a tuple of `((Int, Int), Matrix)`, where the `(Int, Int)` is the index of the block, and `Matrix` is
+the sub-matrix at the given index with size `rowsPerBlock` x `colsPerBlock`.
+`BlockMatrix` supports methods such as `add` and `multiply` with another `BlockMatrix`.
+`BlockMatrix` also has a helper function `validate` which can be used to check whether the
+`BlockMatrix` is set up properly.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+A [`BlockMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) can be
+most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`.
+`toBlockMatrix` creates blocks of size 1024 x 1024 by default.
+Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`.
+
+{% highlight scala %}
+import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
+
+val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries
+// Create a CoordinateMatrix from an RDD[MatrixEntry].
+val coordMat: CoordinateMatrix = new CoordinateMatrix(entries)
+// Transform the CoordinateMatrix to a BlockMatrix
+val matA: BlockMatrix = coordMat.toBlockMatrix().cache()
+
+// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
+// Nothing happens if it is valid.
+matA.validate()
+
+// Calculate A^T A.
+val ata = matA.transpose.multiply(matA)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+A [`BlockMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) can be
+most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`.
+`toBlockMatrix` creates blocks of size 1024 x 1024 by default.
+Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`.
+
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.distributed.BlockMatrix;
+import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
+
+JavaRDD<MatrixEntry> entries = ... // a JavaRDD of (i, j, v) Matrix Entries
+// Create a CoordinateMatrix from a JavaRDD<MatrixEntry>.
+CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd());
+// Transform the CoordinateMatrix to a BlockMatrix
+BlockMatrix matA = coordMat.toBlockMatrix().cache();
+
+// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid.
+// Nothing happens if it is valid.
+matA.validate();
+
+// Calculate A^T A.
+BlockMatrix ata = matA.transpose().multiply(matA);
+{% endhighlight %}
+</div>
+</div>

From 60336e3bc02a2587fdf315f9011bbe7c9d3a58c4 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Tue, 19 May 2015 17:36:00 -0700
Subject: [PATCH 269/320] [SPARK-7656] [SQL] use CatalystConf in
 FunctionRegistry

follow up for #5806

Author: scwf <wangfei1@huawei.com>

Closes #6164 from scwf/FunctionRegistry and squashes the following commits:

15e6697 [scwf] use catalogconf in FunctionRegistry
---
 .../sql/catalyst/analysis/FunctionRegistry.scala     | 12 +++++++-----
 .../main/scala/org/apache/spark/sql/SQLContext.scala |  2 +-
 .../org/apache/spark/sql/hive/HiveContext.scala      |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 16ca5bcd57a72..0849faa9bfa7b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.catalyst.expressions.Expression
 import scala.collection.mutable
 
@@ -28,12 +29,12 @@ trait FunctionRegistry {
 
   def lookupFunction(name: String, children: Seq[Expression]): Expression
 
-  def caseSensitive: Boolean
+  def conf: CatalystConf
 }
 
 trait OverrideFunctionRegistry extends FunctionRegistry {
 
-  val functionBuilders = StringKeyHashMap[FunctionBuilder](caseSensitive)
+  val functionBuilders = StringKeyHashMap[FunctionBuilder](conf.caseSensitiveAnalysis)
 
   override def registerFunction(name: String, builder: FunctionBuilder): Unit = {
     functionBuilders.put(name, builder)
@@ -44,8 +45,9 @@ trait OverrideFunctionRegistry extends FunctionRegistry {
   }
 }
 
-class SimpleFunctionRegistry(val caseSensitive: Boolean) extends FunctionRegistry {
-  val functionBuilders = StringKeyHashMap[FunctionBuilder](caseSensitive)
+class SimpleFunctionRegistry(val conf: CatalystConf) extends FunctionRegistry {
+
+  val functionBuilders = StringKeyHashMap[FunctionBuilder](conf.caseSensitiveAnalysis)
 
   override def registerFunction(name: String, builder: FunctionBuilder): Unit = {
     functionBuilders.put(name, builder)
@@ -69,7 +71,7 @@ object EmptyFunctionRegistry extends FunctionRegistry {
     throw new UnsupportedOperationException
   }
 
-  override def caseSensitive: Boolean = throw new UnsupportedOperationException
+  override def conf: CatalystConf = throw new UnsupportedOperationException
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 316ef7d58809d..304e958192bb9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -121,7 +121,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   // TODO how to handle the temp function per user session?
   @transient
-  protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(true)
+  protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(conf)
 
   @transient
   protected[sql] lazy val analyzer: Analyzer =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 2733ebdb95bca..863a5db1bf98c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -357,7 +357,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   @transient
   override protected[sql] lazy val functionRegistry =
     new HiveFunctionRegistry with OverrideFunctionRegistry {
-      def caseSensitive: Boolean = false
+      override def conf: CatalystConf = currentSession().conf
     }
 
   /* An analyzer that uses the Hive metastore. */

From b3abf0b8d9bca13840eb759953d76905c2ba9b8a Mon Sep 17 00:00:00 2001
From: Xusen Yin <yinxusen@gmail.com>
Date: Wed, 20 May 2015 10:41:18 +0100
Subject: [PATCH 270/320] [SPARK-7663] [MLLIB] Add requirement for word2vec
 model

JIRA issue [link](https://issues.apache.org/jira/browse/SPARK-7663).

We should check the model size of word2vec, to prevent the unexpected empty.

CC srowen.

Author: Xusen Yin <yinxusen@gmail.com>

Closes #6228 from yinxusen/SPARK-7663 and squashes the following commits:

21770c5 [Xusen Yin] check the vocab size
54ae63e [Xusen Yin] add requirement for word2vec model
---
 .../main/scala/org/apache/spark/mllib/feature/Word2Vec.scala   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 731f7576c2335..f65f78299d182 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -158,6 +158,9 @@ class Word2Vec extends Serializable with Logging {
       .sortWith((a, b) => a.cn > b.cn)
     
     vocabSize = vocab.length
+    require(vocabSize > 0, "The vocabulary size should be > 0. You may need to check " +
+      "the setting of minCount, which could be large enough to remove all your words in sentences.")
+
     var a = 0
     while (a < vocabSize) {
       vocabHash += vocab(a).word -> a

From 09265ad7c85c6de6b568ec329daad632d4a79fa3 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Wed, 20 May 2015 19:09:47 +0800
Subject: [PATCH 271/320] [SPARK-7320] [SQL] Add Cube / Rollup for dataframe

Add `cube` & `rollup` for DataFrame
For example:
```scala
testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b"))
testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b"))
```

Author: Cheng Hao <hao.cheng@intel.com>

Closes #6257 from chenghao-intel/rollup and squashes the following commits:

7302319 [Cheng Hao] cancel the implicit keyword
a66e38f [Cheng Hao] remove the unnecessary code changes
a2869d4 [Cheng Hao] update the code as comments
c441777 [Cheng Hao] update the code as suggested
84c9564 [Cheng Hao] Remove the CubedData & RollupedData
279584c [Cheng Hao] hiden the CubedData & RollupedData
ef357e1 [Cheng Hao] Add Cube / Rollup for dataframe
---
 .../org/apache/spark/sql/DataFrame.scala      | 104 +++++++++++++++++-
 .../org/apache/spark/sql/GroupedData.scala    |  92 +++++++++++-----
 .../hive/HiveDataFrameAnalyticsSuite.scala    |  62 +++++++++++
 3 files changed, 230 insertions(+), 28 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index adad85806d1ea..d78b4c2f8909c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -685,7 +685,53 @@ class DataFrame private[sql](
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
+  def groupBy(cols: Column*): GroupedData = {
+    GroupedData(this, cols.map(_.expr), GroupedData.GroupByType)
+  }
+
+  /**
+   * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolluped by department and group.
+   *   df.rollup($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, rolluped by department and gender.
+   *   df.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def rollup(cols: Column*): GroupedData = {
+    GroupedData(this, cols.map(_.expr), GroupedData.RollupType)
+  }
+
+  /**
+   * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   df.cube($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   df.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def cube(cols: Column*): GroupedData = GroupedData(this, cols.map(_.expr), GroupedData.CubeType)
 
   /**
    * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
@@ -710,7 +756,61 @@ class DataFrame private[sql](
   @scala.annotation.varargs
   def groupBy(col1: String, cols: String*): GroupedData = {
     val colNames: Seq[String] = col1 +: cols
-    new GroupedData(this, colNames.map(colName => resolve(colName)))
+    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.GroupByType)
+  }
+
+  /**
+   * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * This is a variant of rollup that can only group by existing columns using column names
+   * (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolluped by department and group.
+   *   df.rollup("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, rolluped by department and gender.
+   *   df.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def rollup(col1: String, cols: String*): GroupedData = {
+    val colNames: Seq[String] = col1 +: cols
+    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.RollupType)
+  }
+
+  /**
+   * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * This is a variant of cube that can only group by existing columns using column names
+   * (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   df.cube("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   df.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def cube(col1: String, cols: String*): GroupedData = {
+    val colNames: Seq[String] = col1 +: cols
+    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.CubeType)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 1381b9f1a6080..f730e4ae00e2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -23,9 +23,40 @@ import scala.language.implicitConversions
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.analysis.Star
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.Aggregate
+import org.apache.spark.sql.catalyst.plans.logical.{Rollup, Cube, Aggregate}
 import org.apache.spark.sql.types.NumericType
 
+/**
+ * Companion object for GroupedData
+ */
+private[sql] object GroupedData {
+  def apply(
+      df: DataFrame,
+      groupingExprs: Seq[Expression],
+      groupType: GroupType): GroupedData = {
+    new GroupedData(df, groupingExprs, groupType: GroupType)
+  }
+
+  /**
+   * The Grouping Type
+   */
+  trait GroupType
+
+  /**
+   * To indicate it's the GroupBy
+   */
+  object GroupByType extends GroupType
+
+  /**
+   * To indicate it's the CUBE
+   */
+  object CubeType extends GroupType
+
+  /**
+   * To indicate it's the ROLLUP
+   */
+  object RollupType extends GroupType
+}
 
 /**
  * :: Experimental ::
@@ -34,19 +65,37 @@ import org.apache.spark.sql.types.NumericType
  * @since 1.3.0
  */
 @Experimental
-class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
+class GroupedData protected[sql](
+    df: DataFrame,
+    groupingExprs: Seq[Expression],
+    private val groupType: GroupedData.GroupType) {
 
-  private[sql] implicit def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
-    val namedGroupingExprs = groupingExprs.map {
-      case expr: NamedExpression => expr
-      case expr: Expression => Alias(expr, expr.prettyString)()
+  private[this] def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
+    val aggregates = if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
+        val retainedExprs = groupingExprs.map {
+          case expr: NamedExpression => expr
+          case expr: Expression => Alias(expr, expr.prettyString)()
+        }
+        retainedExprs ++ aggExprs
+      } else {
+        aggExprs
+      }
+
+    groupType match {
+      case GroupedData.GroupByType =>
+        DataFrame(
+          df.sqlContext, Aggregate(groupingExprs, aggregates, df.logicalPlan))
+      case GroupedData.RollupType =>
+        DataFrame(
+          df.sqlContext, Rollup(groupingExprs, df.logicalPlan, aggregates))
+      case GroupedData.CubeType =>
+        DataFrame(
+          df.sqlContext, Cube(groupingExprs, df.logicalPlan, aggregates))
     }
-    DataFrame(
-      df.sqlContext, Aggregate(groupingExprs, namedGroupingExprs ++ aggExprs, df.logicalPlan))
   }
 
   private[this] def aggregateNumericColumns(colNames: String*)(f: Expression => Expression)
-    : Seq[NamedExpression] = {
+    : DataFrame = {
 
     val columnExprs = if (colNames.isEmpty) {
       // No columns specified. Use all numeric columns.
@@ -63,10 +112,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
         namedExpr
       }
     }
-    columnExprs.map { c =>
+    toDF(columnExprs.map { c =>
       val a = f(c)
       Alias(a, a.prettyString)()
-    }
+    })
   }
 
   private[this] def strToExpr(expr: String): (Expression => Expression) = {
@@ -119,10 +168,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = {
-    exprs.map { case (colName, expr) =>
+    toDF(exprs.map { case (colName, expr) =>
       val a = strToExpr(expr)(df(colName).expr)
       Alias(a, a.prettyString)()
-    }.toSeq
+    }.toSeq)
   }
 
   /**
@@ -175,19 +224,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = {
-    val aggExprs = (expr +: exprs).map(_.expr).map {
+    toDF((expr +: exprs).map(_.expr).map {
       case expr: NamedExpression => expr
       case expr: Expression => Alias(expr, expr.prettyString)()
-    }
-    if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
-      val retainedExprs = groupingExprs.map {
-        case expr: NamedExpression => expr
-        case expr: Expression => Alias(expr, expr.prettyString)()
-      }
-      DataFrame(df.sqlContext, Aggregate(groupingExprs, retainedExprs ++ aggExprs, df.logicalPlan))
-    } else {
-      DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
-    }
+    })
   }
 
   /**
@@ -196,7 +236,7 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *
    * @since 1.3.0
    */
-  def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")())
+  def count(): DataFrame = toDF(Seq(Alias(Count(Literal(1)), "count")()))
 
   /**
    * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
@@ -256,5 +296,5 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   @scala.annotation.varargs
   def sum(colNames: String*): DataFrame = {
     aggregateNumericColumns(colNames:_*)(Sum)
-  }    
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
new file mode 100644
index 0000000000000..3ad05f482504c
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+case class TestData2Int(a: Int, b: Int)
+
+// TODO ideally we should put the test suite into the package `sql`, as
+// `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
+// support the `cube` or `rollup` yet.
+class HiveDataFrameAnalyticsSuite extends QueryTest {
+  val testData =
+    TestHive.sparkContext.parallelize(
+      TestData2Int(1, 2) ::
+        TestData2Int(2, 4) :: Nil).toDF()
+
+  testData.registerTempTable("mytable")
+
+  test("rollup") {
+    checkAnswer(
+      testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect()
+    )
+
+    checkAnswer(
+      testData.rollup("a", "b").agg(sum("b")),
+      sql("select a, b, sum(b) from mytable group by a, b with rollup").collect()
+    )
+  }
+
+  test("cube") {
+    checkAnswer(
+      testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect()
+    )
+
+    checkAnswer(
+      testData.cube("a", "b").agg(sum("b")),
+      sql("select a, b, sum(b) from mytable group by a, b with cube").collect()
+    )
+  }
+}

From 3ddf051ee7256f642f8a17768d161c7b5f55c7e1 Mon Sep 17 00:00:00 2001
From: ehnalis <zoltan.zvara@gmail.com>
Date: Wed, 20 May 2015 08:27:39 -0500
Subject: [PATCH 272/320] [SPARK-7533] [YARN] Decrease spacing between AM-RM
 heartbeats.

Added faster RM-heartbeats on pending container allocations with multiplicative back-off.
Also updated related documentations.

Author: ehnalis <zoltan.zvara@gmail.com>

Closes #6082 from ehnalis/yarn and squashes the following commits:

a1d2101 [ehnalis] MIss-spell fixed.
90f8ba4 [ehnalis] Changed default HB values.
6120295 [ehnalis] Removed the bug, when allocation heartbeat would not start from initial value.
08bac63 [ehnalis] Refined style, grammar, removed duplicated code.
073d283 [ehnalis] [SPARK-7533] [YARN] Decrease spacing between AM-RM heartbeats.
d4408c9 [ehnalis] [SPARK-7533] [YARN] Decrease spacing between AM-RM heartbeats.
---
 docs/running-on-yarn.md                       | 15 +++++++-
 .../spark/deploy/yarn/ApplicationMaster.scala | 34 ++++++++++++++-----
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 51c1339165024..9d55f435e80ad 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -71,9 +71,22 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
 </tr>
 <tr>
   <td><code>spark.yarn.scheduler.heartbeat.interval-ms</code></td>
-  <td>5000</td>
+  <td>3000</td>
   <td>
     The interval in ms in which the Spark application master heartbeats into the YARN ResourceManager.
+    The value is capped at half the value of YARN's configuration for the expiry interval
+    (<code>yarn.am.liveness-monitor.expiry-interval-ms</code>).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.yarn.scheduler.initial-allocation.interval</code></td>
+  <td>200ms</td>
+  <td>
+    The initial interval in which the Spark application master eagerly heartbeats to the YARN ResourceManager
+    when there are pending container allocation requests. It should be no larger than
+    <code>spark.yarn.scheduler.heartbeat.interval-ms</code>. The allocation interval will doubled on
+    successive eager heartbeats if pending containers still exist, until
+    <code>spark.yarn.scheduler.heartbeat.interval-ms</code> is reached.
   </td>
 </tr>
 <tr>
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 29752969e6152..63a6f2e9472c1 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -300,11 +300,14 @@ private[spark] class ApplicationMaster(
     val expiryInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
 
     // we want to be reasonably responsive without causing too many requests to RM.
-    val schedulerInterval =
-      sparkConf.getTimeAsMs("spark.yarn.scheduler.heartbeat.interval-ms", "5s")
+    val heartbeatInterval = math.max(0, math.min(expiryInterval / 2,
+      sparkConf.getTimeAsMs("spark.yarn.scheduler.heartbeat.interval-ms", "3s")))
 
-    // must be <= expiryInterval / 2.
-    val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
+    // we want to check more frequently for pending containers
+    val initialAllocationInterval = math.min(heartbeatInterval,
+      sparkConf.getTimeAsMs("spark.yarn.scheduler.initial-allocation.interval", "200ms"))
+
+    var nextAllocationInterval = initialAllocationInterval
 
     // The number of failures in a row until Reporter thread give up
     val reporterMaxFailures = sparkConf.getInt("spark.yarn.scheduler.reporterThread.maxFailures", 5)
@@ -330,15 +333,27 @@ private[spark] class ApplicationMaster(
               if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
                 finish(FinalApplicationStatus.FAILED,
                   ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
-                    s"${failureCount} time(s) from Reporter thread.")
-
+                    s"$failureCount time(s) from Reporter thread.")
               } else {
-                logWarning(s"Reporter thread fails ${failureCount} time(s) in a row.", e)
+                logWarning(s"Reporter thread fails $failureCount time(s) in a row.", e)
               }
             }
           }
           try {
-            Thread.sleep(interval)
+            val numPendingAllocate = allocator.getNumPendingAllocate
+            val sleepInterval =
+              if (numPendingAllocate > 0) {
+                val currentAllocationInterval =
+                  math.min(heartbeatInterval, nextAllocationInterval)
+                nextAllocationInterval *= 2
+                currentAllocationInterval
+              } else {
+                nextAllocationInterval = initialAllocationInterval
+                heartbeatInterval
+              }
+            logDebug(s"Number of pending allocations is $numPendingAllocate. " +
+                     s"Sleeping for $sleepInterval.")
+            Thread.sleep(sleepInterval)
           } catch {
             case e: InterruptedException =>
           }
@@ -349,7 +364,8 @@ private[spark] class ApplicationMaster(
     t.setDaemon(true)
     t.setName("Reporter")
     t.start()
-    logInfo("Started progress reporter thread - sleep time : " + interval)
+    logInfo(s"Started progress reporter thread with (heartbeat : $heartbeatInterval, " +
+            s"initial allocation : $initialAllocationInterval) intervals")
     t
   }
 

From 589b12f8e62ec5d10713ce057756ebc791e7ddc6 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 20 May 2015 07:46:17 -0700
Subject: [PATCH 273/320] [SPARK-7654] [MLLIB] Migrate MLlib to the DataFrame
 reader/writer API

parquetFile -> read.parquet rxin

Author: Xiangrui Meng <meng@databricks.com>

Closes #6281 from mengxr/SPARK-7654 and squashes the following commits:

a79b612 [Xiangrui Meng] parquetFile -> read.parquet
---
 .../org/apache/spark/mllib/classification/NaiveBayes.scala    | 4 ++--
 .../mllib/classification/impl/GLMClassificationModel.scala    | 2 +-
 .../apache/spark/mllib/clustering/GaussianMixtureModel.scala  | 2 +-
 .../scala/org/apache/spark/mllib/clustering/KMeansModel.scala | 2 +-
 .../main/scala/org/apache/spark/mllib/feature/Word2Vec.scala  | 2 +-
 .../spark/mllib/recommendation/MatrixFactorizationModel.scala | 4 ++--
 .../apache/spark/mllib/regression/IsotonicRegression.scala    | 2 +-
 .../spark/mllib/regression/impl/GLMRegressionModel.scala      | 2 +-
 .../org/apache/spark/mllib/tree/model/DecisionTreeModel.scala | 2 +-
 .../apache/spark/mllib/tree/model/treeEnsembleModels.scala    | 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 53fb2cba03cbf..cffe9ef1e0b2a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -153,7 +153,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
     def load(sc: SparkContext, path: String): NaiveBayesModel = {
       val sqlContext = new SQLContext(sc)
       // Load Parquet data.
-      val dataRDD = sqlContext.parquetFile(dataPath(path))
+      val dataRDD = sqlContext.read.parquet(dataPath(path))
       // Check schema explicitly since erasure makes it hard to use match-case for checking.
       checkSchema[Data](dataRDD.schema)
       val dataArray = dataRDD.select("labels", "pi", "theta", "modelType").take(1)
@@ -199,7 +199,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
     def load(sc: SparkContext, path: String): NaiveBayesModel = {
       val sqlContext = new SQLContext(sc)
       // Load Parquet data.
-      val dataRDD = sqlContext.parquetFile(dataPath(path))
+      val dataRDD = sqlContext.read.parquet(dataPath(path))
       // Check schema explicitly since erasure makes it hard to use match-case for checking.
       checkSchema[Data](dataRDD.schema)
       val dataArray = dataRDD.select("labels", "pi", "theta").take(1)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
index d842ec57b2f52..fe09f6b75d28b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
@@ -75,7 +75,7 @@ private[classification] object GLMClassificationModel {
     def loadData(sc: SparkContext, path: String, modelClass: String): Data = {
       val datapath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
-      val dataRDD = sqlContext.parquetFile(datapath)
+      val dataRDD = sqlContext.read.parquet(datapath)
       val dataArray = dataRDD.select("weights", "intercept", "threshold").take(1)
       assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath")
       val data = dataArray(0)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 731b43a1be574..86353aed81156 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -132,7 +132,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
     def load(sc: SparkContext, path: String): GaussianMixtureModel = {
       val dataPath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
-      val dataFrame = sqlContext.parquetFile(dataPath)
+      val dataFrame = sqlContext.read.parquet(dataPath)
       val dataArray = dataFrame.select("weight", "mu", "sigma").collect()
 
       // Check schema explicitly since erasure makes it hard to use match-case for checking.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 252e166e85cef..8ecb3df11d95e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -120,7 +120,7 @@ object KMeansModel extends Loader[KMeansModel] {
       assert(className == thisClassName)
       assert(formatVersion == thisFormatVersion)
       val k = (metadata \ "k").extract[Int]
-      val centriods = sqlContext.parquetFile(Loader.dataPath(path))
+      val centriods = sqlContext.read.parquet(Loader.dataPath(path))
       Loader.checkSchema[Cluster](centriods.schema)
       val localCentriods = centriods.map(Cluster.apply).collect()
       assert(k == localCentriods.size)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index f65f78299d182..9106b73dfcd76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -559,7 +559,7 @@ object Word2VecModel extends Loader[Word2VecModel] {
     def load(sc: SparkContext, path: String): Word2VecModel = {
       val dataPath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
-      val dataFrame = sqlContext.parquetFile(dataPath)
+      val dataFrame = sqlContext.read.parquet(dataPath)
 
       val dataArray = dataFrame.select("word", "vector").collect()
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index b960fbc5bf5f5..93aa41e49961e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -292,11 +292,11 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
       assert(className == thisClassName)
       assert(formatVersion == thisFormatVersion)
       val rank = (metadata \ "rank").extract[Int]
-      val userFeatures = sqlContext.parquetFile(userPath(path))
+      val userFeatures = sqlContext.read.parquet(userPath(path))
         .map { case Row(id: Int, features: Seq[_]) =>
           (id, features.asInstanceOf[Seq[Double]].toArray)
         }
-      val productFeatures = sqlContext.parquetFile(productPath(path))
+      val productFeatures = sqlContext.read.parquet(productPath(path))
         .map { case Row(id: Int, features: Seq[_]) =>
         (id, features.asInstanceOf[Seq[Double]].toArray)
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 22b9b22a871f0..3ea63dd8c0acd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -189,7 +189,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
 
     def load(sc: SparkContext, path: String): (Array[Double], Array[Double]) = {
       val sqlContext = new SQLContext(sc)
-      val dataRDD = sqlContext.parquetFile(dataPath(path))
+      val dataRDD = sqlContext.read.parquet(dataPath(path))
 
       checkSchema[Data](dataRDD.schema)
       val dataArray = dataRDD.select("boundary", "prediction").collect()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
index 2aa0e9ef96d48..317d3a5702636 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -72,7 +72,7 @@ private[regression] object GLMRegressionModel {
     def loadData(sc: SparkContext, path: String, modelClass: String, numFeatures: Int): Data = {
       val datapath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
-      val dataRDD = sqlContext.parquetFile(datapath)
+      val dataRDD = sqlContext.read.parquet(datapath)
       val dataArray = dataRDD.select("weights", "intercept").take(1)
       assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath")
       val data = dataArray(0)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index a558f84c8d506..25bb1453db404 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -230,7 +230,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
       val datapath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
       // Load Parquet data.
-      val dataRDD = sqlContext.parquetFile(datapath)
+      val dataRDD = sqlContext.read.parquet(datapath)
       // Check schema explicitly since erasure makes it hard to use match-case for checking.
       Loader.checkSchema[NodeData](dataRDD.schema)
       val nodes = dataRDD.map(NodeData.apply)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index f9cd0140fe63f..1e3333d8d81d0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -437,7 +437,7 @@ private[tree] object TreeEnsembleModel extends Logging {
         treeAlgo: String): Array[DecisionTreeModel] = {
       val datapath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
-      val nodes = sqlContext.parquetFile(datapath).map(NodeData.apply)
+      val nodes = sqlContext.read.parquet(datapath).map(NodeData.apply)
       val trees = constructTrees(nodes)
       trees.map(new DecisionTreeModel(_, Algo.fromString(treeAlgo)))
     }

From 98a46f9dffec294386f6c39acafa7f11adb87a8f Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 20 May 2015 07:55:51 -0700
Subject: [PATCH 274/320] [SPARK-6094] [MLLIB] Add MultilabelMetrics in
 PySpark/MLlib

Add MultilabelMetrics in PySpark/MLlib

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #6276 from yanboliang/spark-6094 and squashes the following commits:

b8e3343 [Yanbo Liang] Add MultilabelMetrics in PySpark/MLlib
---
 .../mllib/evaluation/MultilabelMetrics.scala  |   8 ++
 python/pyspark/mllib/evaluation.py            | 117 ++++++++++++++++++
 2 files changed, 125 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index a8378a76d20ae..bf6eb1d5bd2ab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.evaluation
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
+import org.apache.spark.sql.DataFrame
 
 /**
  * Evaluator for multilabel classification.
@@ -27,6 +28,13 @@ import org.apache.spark.SparkContext._
  */
 class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
 
+  /**
+   * An auxiliary constructor taking a DataFrame.
+   * @param predictionAndLabels a DataFrame with two double array columns: prediction and label
+   */
+  private[mllib] def this(predictionAndLabels: DataFrame) =
+    this(predictionAndLabels.map(r => (r.getSeq[Double](0).toArray, r.getSeq[Double](1).toArray)))
+
   private lazy val numDocs: Long = predictionAndLabels.count()
 
   private lazy val numLabels: Long = predictionAndLabels.flatMap { case (_, labels) =>
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index a5e5ddc8fe506..aab5e5f4b77b5 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -343,6 +343,123 @@ def ndcgAt(self, k):
         return self.call("ndcgAt", int(k))
 
 
+class MultilabelMetrics(JavaModelWrapper):
+    """
+    Evaluator for multilabel classification.
+
+    >>> predictionAndLabels = sc.parallelize([([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),
+    ...     ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),
+    ...     ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])])
+    >>> metrics = MultilabelMetrics(predictionAndLabels)
+    >>> metrics.precision(0.0)
+    1.0
+    >>> metrics.recall(1.0)
+    0.66...
+    >>> metrics.f1Measure(2.0)
+    0.5
+    >>> metrics.precision()
+    0.66...
+    >>> metrics.recall()
+    0.64...
+    >>> metrics.f1Measure()
+    0.63...
+    >>> metrics.microPrecision
+    0.72...
+    >>> metrics.microRecall
+    0.66...
+    >>> metrics.microF1Measure
+    0.69...
+    >>> metrics.hammingLoss
+    0.33...
+    >>> metrics.subsetAccuracy
+    0.28...
+    >>> metrics.accuracy
+    0.54...
+    """
+
+    def __init__(self, predictionAndLabels):
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels,
+                                     schema=sql_ctx._inferSchema(predictionAndLabels))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.MultilabelMetrics
+        java_model = java_class(df._jdf)
+        super(MultilabelMetrics, self).__init__(java_model)
+
+    def precision(self, label=None):
+        """
+        Returns precision or precision for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("precision")
+        else:
+            return self.call("precision", float(label))
+
+    def recall(self, label=None):
+        """
+        Returns recall or recall for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("recall")
+        else:
+            return self.call("recall", float(label))
+
+    def f1Measure(self, label=None):
+        """
+        Returns f1Measure or f1Measure for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("f1Measure")
+        else:
+            return self.call("f1Measure", float(label))
+
+    @property
+    def microPrecision(self):
+        """
+        Returns micro-averaged label-based precision.
+        (equals to micro-averaged document-based precision)
+        """
+        return self.call("microPrecision")
+
+    @property
+    def microRecall(self):
+        """
+        Returns micro-averaged label-based recall.
+        (equals to micro-averaged document-based recall)
+        """
+        return self.call("microRecall")
+
+    @property
+    def microF1Measure(self):
+        """
+        Returns micro-averaged label-based f1-measure.
+        (equals to micro-averaged document-based f1-measure)
+        """
+        return self.call("microF1Measure")
+
+    @property
+    def hammingLoss(self):
+        """
+        Returns Hamming-loss.
+        """
+        return self.call("hammingLoss")
+
+    @property
+    def subsetAccuracy(self):
+        """
+        Returns subset accuracy.
+        (for equal sets of labels)
+        """
+        return self.call("subsetAccuracy")
+
+    @property
+    def accuracy(self):
+        """
+        Returns accuracy.
+        """
+        return self.call("accuracy")
+
+
 def _test():
     import doctest
     from pyspark import SparkContext

From b631bf73b9f288f37c98b806be430b22485880e5 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Wed, 20 May 2015 11:23:40 -0700
Subject: [PATCH 275/320] [SPARK-7713] [SQL] Use shared broadcast hadoop conf
 for partitioned table scan.

https://issues.apache.org/jira/browse/SPARK-7713

I tested the performance with the following code:
```scala
import sqlContext._
import sqlContext.implicits._

(1 to 5000).foreach { i =>
  val df = (1 to 1000).map(j => (j, s"str$j")).toDF("a", "b").save(s"/tmp/partitioned/i=$i")
}

sqlContext.sql("""
CREATE TEMPORARY TABLE partitionedParquet
USING org.apache.spark.sql.parquet
OPTIONS (
  path '/tmp/partitioned'
)""")

table("partitionedParquet").explain(true)
```

In our master `explain` takes 40s in my laptop. With this PR, `explain` takes 14s.

Author: Yin Huai <yhuai@databricks.com>

Closes #6252 from yhuai/broadcastHadoopConf and squashes the following commits:

6fa73df [Yin Huai] Address comments of Josh and Andrew.
807fbf9 [Yin Huai] Make the new buildScan and SqlNewHadoopRDD private sql.
e393555 [Yin Huai] Cheng's comments.
2eb53bb [Yin Huai] Use a shared broadcast Hadoop Configuration for partitioned HadoopFsRelations.
---
 .../apache/spark/sql/parquet/newParquet.scala | 113 +++++---
 .../sql/sources/DataSourceStrategy.scala      |  19 +-
 .../spark/sql/sources/SqlNewHadoopRDD.scala   | 268 ++++++++++++++++++
 .../apache/spark/sql/sources/interfaces.scala |  35 ++-
 4 files changed, 387 insertions(+), 48 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 7ca44f7b81a2d..c35b7eff82af5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConversions._
 import scala.util.Try
 
 import com.google.common.base.Objects
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapreduce._
@@ -32,13 +33,14 @@ import parquet.hadoop._
 import parquet.hadoop.metadata.CompressionCodecName
 import parquet.hadoop.util.ContextUtil
 
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD._
-import org.apache.spark.rdd.{NewHadoopPartition, NewHadoopRDD, RDD}
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.{Row, SQLConf, SQLContext}
-import org.apache.spark.{Logging, Partition => SparkPartition, SparkException}
+import org.apache.spark.{Partition => SparkPartition, SparkEnv, SerializableWritable, Logging, SparkException}
 
 private[sql] class DefaultSource extends HadoopFsRelationProvider {
   override def createRelation(
@@ -233,40 +235,20 @@ private[sql] class ParquetRelation2(
   override def buildScan(
       requiredColumns: Array[String],
       filters: Array[Filter],
-      inputFiles: Array[FileStatus]): RDD[Row] = {
-
-    val job = new Job(SparkHadoopUtil.get.conf)
-    val conf = ContextUtil.getConfiguration(job)
-
-    ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport])
-
-    if (inputFiles.nonEmpty) {
-      FileInputFormat.setInputPaths(job, inputFiles.map(_.getPath): _*)
-    }
-
-    // Try to push down filters when filter push-down is enabled.
-    if (sqlContext.conf.parquetFilterPushDown) {
-      filters
-        // Collects all converted Parquet filter predicates. Notice that not all predicates can be
-        // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
-        // is used here.
-        .flatMap(ParquetFilters.createFilter(dataSchema, _))
-        .reduceOption(FilterApi.and)
-        .foreach(ParquetInputFormat.setFilterPredicate(conf, _))
-    }
-
-    conf.set(RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA, {
-      val requestedSchema = StructType(requiredColumns.map(dataSchema(_)))
-      ParquetTypesConverter.convertToString(requestedSchema.toAttributes)
-    })
-
-    conf.set(
-      RowWriteSupport.SPARK_ROW_SCHEMA,
-      ParquetTypesConverter.convertToString(dataSchema.toAttributes))
-
-    // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
+      inputFiles: Array[FileStatus],
+      broadcastedConf: Broadcast[SerializableWritable[Configuration]]): RDD[Row] = {
     val useMetadataCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean
-    conf.set(SQLConf.PARQUET_CACHE_METADATA, useMetadataCache.toString)
+    val parquetFilterPushDown = sqlContext.conf.parquetFilterPushDown
+    // Create the function to set variable Parquet confs at both driver and executor side.
+    val initLocalJobFuncOpt =
+      ParquetRelation2.initializeLocalJobFunc(
+        requiredColumns,
+        filters,
+        dataSchema,
+        useMetadataCache,
+        parquetFilterPushDown) _
+    // Create the function to set input paths at the driver side.
+    val setInputPaths = ParquetRelation2.initializeDriverSideJobFunc(inputFiles) _
 
     val footers = inputFiles.map(f => metadataCache.footers(f.getPath))
 
@@ -274,12 +256,14 @@ private[sql] class ParquetRelation2(
     // After upgrading to Parquet 1.6.0, we should be able to stop caching `FileStatus` objects and
     // footers.  Especially when a global arbitrative schema (either from metastore or data source
     // DDL) is available.
-    new NewHadoopRDD(
-      sqlContext.sparkContext,
-      classOf[FilteringParquetRowInputFormat],
-      classOf[Void],
-      classOf[Row],
-      conf) {
+    new SqlNewHadoopRDD(
+      sc = sqlContext.sparkContext,
+      broadcastedConf = broadcastedConf,
+      initDriverSideJobFuncOpt = Some(setInputPaths),
+      initLocalJobFuncOpt = Some(initLocalJobFuncOpt),
+      inputFormatClass = classOf[FilteringParquetRowInputFormat],
+      keyClass = classOf[Void],
+      valueClass = classOf[Row]) {
 
       val cacheMetadata = useMetadataCache
 
@@ -311,11 +295,11 @@ private[sql] class ParquetRelation2(
           new FilteringParquetRowInputFormat
         }
 
-        val jobContext = newJobContext(getConf, jobId)
+        val jobContext = newJobContext(getConf(isDriverSide = true), jobId)
         val rawSplits = inputFormat.getSplits(jobContext)
 
         Array.tabulate[SparkPartition](rawSplits.size) { i =>
-          new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+          new SqlNewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
         }
       }
     }.values
@@ -452,6 +436,49 @@ private[sql] object ParquetRelation2 extends Logging {
   // internally.
   private[sql] val METASTORE_SCHEMA = "metastoreSchema"
 
+  /** This closure sets various Parquet configurations at both driver side and executor side. */
+  private[parquet] def initializeLocalJobFunc(
+      requiredColumns: Array[String],
+      filters: Array[Filter],
+      dataSchema: StructType,
+      useMetadataCache: Boolean,
+      parquetFilterPushDown: Boolean)(job: Job): Unit = {
+    val conf = job.getConfiguration
+    conf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[RowReadSupport].getName())
+
+    // Try to push down filters when filter push-down is enabled.
+    if (parquetFilterPushDown) {
+      filters
+        // Collects all converted Parquet filter predicates. Notice that not all predicates can be
+        // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap`
+        // is used here.
+        .flatMap(ParquetFilters.createFilter(dataSchema, _))
+        .reduceOption(FilterApi.and)
+        .foreach(ParquetInputFormat.setFilterPredicate(conf, _))
+    }
+
+    conf.set(RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA, {
+      val requestedSchema = StructType(requiredColumns.map(dataSchema(_)))
+      ParquetTypesConverter.convertToString(requestedSchema.toAttributes)
+    })
+
+    conf.set(
+      RowWriteSupport.SPARK_ROW_SCHEMA,
+      ParquetTypesConverter.convertToString(dataSchema.toAttributes))
+
+    // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata
+    conf.set(SQLConf.PARQUET_CACHE_METADATA, useMetadataCache.toString)
+  }
+
+  /** This closure sets input paths at the driver side. */
+  private[parquet] def initializeDriverSideJobFunc(
+      inputFiles: Array[FileStatus])(job: Job): Unit = {
+    // We side the input paths at the driver side.
+    if (inputFiles.nonEmpty) {
+      FileInputFormat.setInputPaths(job, inputFiles.map(_.getPath): _*)
+    }
+  }
+
   private[parquet] def readSchema(
       footers: Seq[Footer], sqlContext: SQLContext): Option[StructType] = {
     footers.map { footer =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index 1615a6dcbdb2a..550090d22d551 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.spark.Logging
+import org.apache.spark.{SerializableWritable, Logging}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.{RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
@@ -84,11 +85,16 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
 
     // Scanning non-partitioned HadoopFsRelation
     case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation)) =>
+      // See buildPartitionedTableScan for the reason that we need to create a shard
+      // broadcast HadoopConf.
+      val sharedHadoopConf = SparkHadoopUtil.get.conf
+      val confBroadcast =
+        t.sqlContext.sparkContext.broadcast(new SerializableWritable(sharedHadoopConf))
       pruneFilterProject(
         l,
         projectList,
         filters,
-        (a, f) => t.buildScan(a, f, t.paths)) :: Nil
+        (a, f) => t.buildScan(a, f, t.paths, confBroadcast)) :: Nil
 
     case l @ LogicalRelation(t: TableScan) =>
       createPhysicalRDD(l.relation, l.output, t.buildScan()) :: Nil
@@ -115,6 +121,12 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
     val output = projections.map(_.toAttribute)
     val relation = logicalRelation.relation.asInstanceOf[HadoopFsRelation]
 
+    // Because we are creating one RDD per partition, we need to have a shared HadoopConf.
+    // Otherwise, the cost of broadcasting HadoopConf in every RDD will be high.
+    val sharedHadoopConf = SparkHadoopUtil.get.conf
+    val confBroadcast =
+      relation.sqlContext.sparkContext.broadcast(new SerializableWritable(sharedHadoopConf))
+
     // Builds RDD[Row]s for each selected partition.
     val perPartitionRows = partitions.map { case Partition(partitionValues, dir) =>
       // The table scan operator (PhysicalRDD) which retrieves required columns from data files.
@@ -132,7 +144,8 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
             // assuming partition columns data stored in data files are always consistent with those
             // partition values encoded in partition directory paths.
             val nonPartitionColumns = requiredColumns.filterNot(partitionColNames.contains)
-            val dataRows = relation.buildScan(nonPartitionColumns, filters, Array(dir))
+            val dataRows =
+              relation.buildScan(nonPartitionColumns, filters, Array(dir), confBroadcast)
 
             // Merges data values with partition values.
             mergeWithPartitionValues(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala
new file mode 100644
index 0000000000000..0c7bb6e50cd98
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import java.text.SimpleDateFormat
+import java.util.Date
+
+import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit}
+import org.apache.spark.broadcast.Broadcast
+
+import org.apache.spark.{Partition => SparkPartition, _}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.executor.DataReadMethod
+import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
+import org.apache.spark.rdd.{RDD, HadoopRDD}
+import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
+
+import scala.reflect.ClassTag
+
+private[spark] class SqlNewHadoopPartition(
+    rddId: Int,
+    val index: Int,
+    @transient rawSplit: InputSplit with Writable)
+  extends SparkPartition {
+
+  val serializableHadoopSplit = new SerializableWritable(rawSplit)
+
+  override def hashCode(): Int = 41 * (41 + rddId) + index
+}
+
+/**
+ * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
+ * sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
+ * It is based on [[org.apache.spark.rdd.NewHadoopRDD]]. It has three additions.
+ * 1. A shared broadcast Hadoop Configuration.
+ * 2. An optional closure `initDriverSideJobFuncOpt` that set configurations at the driver side
+ *    to the shared Hadoop Configuration.
+ * 3. An optional closure `initLocalJobFuncOpt` that set configurations at both the driver side
+ *    and the executor side to the shared Hadoop Configuration.
+ *
+ * Note: This is RDD is basically a cloned version of [[org.apache.spark.rdd.NewHadoopRDD]] with
+ * changes based on [[org.apache.spark.rdd.HadoopRDD]]. In future, this functionality will be
+ * folded into core.
+ */
+private[sql] class SqlNewHadoopRDD[K, V](
+    @transient sc : SparkContext,
+    broadcastedConf: Broadcast[SerializableWritable[Configuration]],
+    @transient initDriverSideJobFuncOpt: Option[Job => Unit],
+    initLocalJobFuncOpt: Option[Job => Unit],
+    inputFormatClass: Class[_ <: InputFormat[K, V]],
+    keyClass: Class[K],
+    valueClass: Class[V])
+  extends RDD[(K, V)](sc, Nil)
+  with SparkHadoopMapReduceUtil
+  with Logging {
+
+  if (initLocalJobFuncOpt.isDefined) {
+    sc.clean(initLocalJobFuncOpt.get)
+  }
+
+  protected def getJob(): Job = {
+    val conf: Configuration = broadcastedConf.value.value
+    // "new Job" will make a copy of the conf. Then, it is
+    // safe to mutate conf properties with initLocalJobFuncOpt
+    // and initDriverSideJobFuncOpt.
+    val newJob = new Job(conf)
+    initLocalJobFuncOpt.map(f => f(newJob))
+    newJob
+  }
+
+  def getConf(isDriverSide: Boolean): Configuration = {
+    val job = getJob()
+    if (isDriverSide) {
+      initDriverSideJobFuncOpt.map(f => f(job))
+    }
+    job.getConfiguration
+  }
+
+  private val jobTrackerId: String = {
+    val formatter = new SimpleDateFormat("yyyyMMddHHmm")
+    formatter.format(new Date())
+  }
+
+  @transient protected val jobId = new JobID(jobTrackerId, id)
+
+  override def getPartitions: Array[SparkPartition] = {
+    val conf = getConf(isDriverSide = true)
+    val inputFormat = inputFormatClass.newInstance
+    inputFormat match {
+      case configurable: Configurable =>
+        configurable.setConf(conf)
+      case _ =>
+    }
+    val jobContext = newJobContext(conf, jobId)
+    val rawSplits = inputFormat.getSplits(jobContext).toArray
+    val result = new Array[SparkPartition](rawSplits.size)
+    for (i <- 0 until rawSplits.size) {
+      result(i) =
+        new SqlNewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+    }
+    result
+  }
+
+  override def compute(
+      theSplit: SparkPartition,
+      context: TaskContext): InterruptibleIterator[(K, V)] = {
+    val iter = new Iterator[(K, V)] {
+      val split = theSplit.asInstanceOf[SqlNewHadoopPartition]
+      logInfo("Input split: " + split.serializableHadoopSplit)
+      val conf = getConf(isDriverSide = false)
+
+      val inputMetrics = context.taskMetrics
+        .getInputMetricsForReadMethod(DataReadMethod.Hadoop)
+
+      // Find a function that will return the FileSystem bytes read by this thread. Do this before
+      // creating RecordReader, because RecordReader's constructor might read some bytes
+      val bytesReadCallback = inputMetrics.bytesReadCallback.orElse {
+        split.serializableHadoopSplit.value match {
+          case _: FileSplit | _: CombineFileSplit =>
+            SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
+          case _ => None
+        }
+      }
+      inputMetrics.setBytesReadCallback(bytesReadCallback)
+
+      val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0)
+      val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
+      val format = inputFormatClass.newInstance
+      format match {
+        case configurable: Configurable =>
+          configurable.setConf(conf)
+        case _ =>
+      }
+      val reader = format.createRecordReader(
+        split.serializableHadoopSplit.value, hadoopAttemptContext)
+      reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
+
+      // Register an on-task-completion callback to close the input stream.
+      context.addTaskCompletionListener(context => close())
+      var havePair = false
+      var finished = false
+      var recordsSinceMetricsUpdate = 0
+
+      override def hasNext: Boolean = {
+        if (!finished && !havePair) {
+          finished = !reader.nextKeyValue
+          havePair = !finished
+        }
+        !finished
+      }
+
+      override def next(): (K, V) = {
+        if (!hasNext) {
+          throw new java.util.NoSuchElementException("End of stream")
+        }
+        havePair = false
+        if (!finished) {
+          inputMetrics.incRecordsRead(1)
+        }
+        (reader.getCurrentKey, reader.getCurrentValue)
+      }
+
+      private def close() {
+        try {
+          reader.close()
+          if (bytesReadCallback.isDefined) {
+            inputMetrics.updateBytesRead()
+          } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
+                     split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) {
+            // If we can't get the bytes read from the FS stats, fall back to the split size,
+            // which may be inaccurate.
+            try {
+              inputMetrics.incBytesRead(split.serializableHadoopSplit.value.getLength)
+            } catch {
+              case e: java.io.IOException =>
+                logWarning("Unable to get input size to set InputMetrics for task", e)
+            }
+          }
+        } catch {
+          case e: Exception => {
+            if (!Utils.inShutdown()) {
+              logWarning("Exception in RecordReader.close()", e)
+            }
+          }
+        }
+      }
+    }
+    new InterruptibleIterator(context, iter)
+  }
+
+  /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
+  @DeveloperApi
+  def mapPartitionsWithInputSplit[U: ClassTag](
+      f: (InputSplit, Iterator[(K, V)]) => Iterator[U],
+      preservesPartitioning: Boolean = false): RDD[U] = {
+    new NewHadoopMapPartitionsWithSplitRDD(this, f, preservesPartitioning)
+  }
+
+  override def getPreferredLocations(hsplit: SparkPartition): Seq[String] = {
+    val split = hsplit.asInstanceOf[SqlNewHadoopPartition].serializableHadoopSplit.value
+    val locs = HadoopRDD.SPLIT_INFO_REFLECTIONS match {
+      case Some(c) => 
+        try {
+          val infos = c.newGetLocationInfo.invoke(split).asInstanceOf[Array[AnyRef]]
+          Some(HadoopRDD.convertSplitLocationInfo(infos))
+        } catch {
+          case e : Exception =>
+            logDebug("Failed to use InputSplit#getLocationInfo.", e)
+            None
+        }
+      case None => None
+    }
+    locs.getOrElse(split.getLocations.filter(_ != "localhost"))
+  }
+
+  override def persist(storageLevel: StorageLevel): this.type = {
+    if (storageLevel.deserialized) {
+      logWarning("Caching NewHadoopRDDs as deserialized objects usually leads to undesired" +
+        " behavior because Hadoop's RecordReader reuses the same Writable object for all records." +
+        " Use a map transformation to make copies of the records.")
+    }
+    super.persist(storageLevel)
+  }
+}
+
+private[spark] object SqlNewHadoopRDD {
+  /**
+   * Analogous to [[org.apache.spark.rdd.MapPartitionsRDD]], but passes in an InputSplit to
+   * the given function rather than the index of the partition.
+   */
+  private[spark] class NewHadoopMapPartitionsWithSplitRDD[U: ClassTag, T: ClassTag](
+      prev: RDD[T],
+      f: (InputSplit, Iterator[T]) => Iterator[U],
+      preservesPartitioning: Boolean = false)
+    extends RDD[U](prev) {
+
+    override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None
+
+    override def getPartitions: Array[SparkPartition] = firstParent[T].partitions
+
+    override def compute(split: SparkPartition, context: TaskContext): Iterator[U] = {
+      val partition = split.asInstanceOf[SqlNewHadoopPartition]
+      val inputSplit = partition.serializableHadoopSplit.value
+      f(inputSplit, firstParent[T].iterator(split, context))
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 9b52d1be3df2d..6a917bf38b139 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -25,7 +25,9 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
+import org.apache.spark.SerializableWritable
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
@@ -484,7 +486,8 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
   private[sources] final def buildScan(
       requiredColumns: Array[String],
       filters: Array[Filter],
-      inputPaths: Array[String]): RDD[Row] = {
+      inputPaths: Array[String],
+      broadcastedConf: Broadcast[SerializableWritable[Configuration]]): RDD[Row] = {
     val inputStatuses = inputPaths.flatMap { input =>
       val path = new Path(input)
 
@@ -499,7 +502,7 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
       }
     }
 
-    buildScan(requiredColumns, filters, inputStatuses)
+    buildScan(requiredColumns, filters, inputStatuses, broadcastedConf)
   }
 
   /**
@@ -583,6 +586,34 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
     buildScan(requiredColumns, inputFiles)
   }
 
+  /**
+   * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within
+   * this relation. For partitioned relations, this method is called for each selected partition,
+   * and builds an `RDD[Row]` containing all rows within that single partition.
+   *
+   * Note: This interface is subject to change in future.
+   *
+   * @param requiredColumns Required columns.
+   * @param filters Candidate filters to be pushed down. The actual filter should be the conjunction
+   *        of all `filters`.  The pushed down filters are currently purely an optimization as they
+   *        will all be evaluated again. This means it is safe to use them with methods that produce
+   *        false positives such as filtering partitions based on a bloom filter.
+   * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the
+   *        relation. For a partitioned relation, it contains paths of all data files in a single
+   *        selected partition.
+   * @param broadcastedConf A shared broadcast Hadoop Configuration, which can be used to reduce the
+   *                        overhead of broadcasting the Configuration for every Hadoop RDD.
+   *
+   * @since 1.4.0
+   */
+  private[sql] def buildScan(
+      requiredColumns: Array[String],
+      filters: Array[Filter],
+      inputFiles: Array[FileStatus],
+      broadcastedConf: Broadcast[SerializableWritable[Configuration]]): RDD[Row] = {
+    buildScan(requiredColumns, filters, inputFiles)
+  }
+
   /**
    * Prepares a write job and returns an [[OutputWriterFactory]].  Client side job preparation can
    * be put here.  For example, user defined output committer can be configured here

From 2ad4837cfa66fcedc96b0819a8c2f4c3d70b0aaa Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 20 May 2015 12:50:06 -0700
Subject: [PATCH 276/320] [SPARK-7537] [MLLIB] spark.mllib API updates

Minor updates to the spark.mllib APIs:

1. Add `DeveloperApi` to `PMMLExportable` and add `Experimental` to `toPMML` methods.
2. Mention `RankingMetrics.of` in the `RankingMetrics` constructor.

Author: Xiangrui Meng <meng@databricks.com>

Closes #6280 from mengxr/SPARK-7537 and squashes the following commits:

1bd2583 [Xiangrui Meng] organize imports
94afa7a [Xiangrui Meng] mark all toPMML methods experimental
4c40da1 [Xiangrui Meng] mention the factory method for RankingMetrics for Java users
88c62d0 [Xiangrui Meng] add DeveloperApi to PMMLExportable
---
 .../spark/mllib/evaluation/RankingMetrics.scala       |  2 ++
 .../org/apache/spark/mllib/pmml/PMMLExportable.scala  | 11 +++++++++++
 2 files changed, 13 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index b9b54b93c27fa..5b5a2a1450f7f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD
  * ::Experimental::
  * Evaluator for ranking algorithms.
  *
+ * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
+ *
  * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
  */
 @Experimental
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
index 354e90f3eeaa6..5e882d4ebb10b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -23,13 +23,16 @@ import javax.xml.transform.stream.StreamResult
 import org.jpmml.model.JAXBUtil
 
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
 
 /**
+ * :: DeveloperApi ::
  * Export model to the PMML format
  * Predictive Model Markup Language (PMML) is an XML-based file format
  * developed by the Data Mining Group (www.dmg.org).
  */
+@DeveloperApi
 trait PMMLExportable {
 
   /**
@@ -41,30 +44,38 @@ trait PMMLExportable {
   }
 
   /**
+   * :: Experimental ::
    * Export the model to a local file in PMML format
    */
+  @Experimental
   def toPMML(localPath: String): Unit = {
     toPMML(new StreamResult(new File(localPath)))
   }
 
   /**
+   * :: Experimental ::
    * Export the model to a directory on a distributed file system in PMML format
    */
+  @Experimental
   def toPMML(sc: SparkContext, path: String): Unit = {
     val pmml = toPMML()
     sc.parallelize(Array(pmml), 1).saveAsTextFile(path)
   }
 
   /**
+   * :: Experimental ::
    * Export the model to the OutputStream in PMML format
    */
+  @Experimental
   def toPMML(outputStream: OutputStream): Unit = {
     toPMML(new StreamResult(outputStream))
   }
 
   /**
+   * :: Experimental ::
    * Export the model to a String in PMML format
    */
+  @Experimental
   def toPMML(): String = {
     val writer = new StringWriter
     toPMML(new StreamResult(writer))

From 829f1d95bac9153e7b646fbc0d55566ecf896200 Mon Sep 17 00:00:00 2001
From: Sandy Ryza <sandy@cloudera.com>
Date: Wed, 20 May 2015 13:10:30 -0700
Subject: [PATCH 277/320] [SPARK-7579] [ML] [DOC] User guide update for
 OneHotEncoder

Author: Sandy Ryza <sandy@cloudera.com>

Closes #6126 from sryza/sandy-spark-7579 and squashes the following commits:

5af803d [Sandy Ryza] SPARK-7579 [MLLIB] User guide update for OneHotEncoder
---
 docs/ml-features.md | 95 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 63ea3e5db7ac9..235029d71fadd 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -440,5 +440,100 @@ for expanded in polyDF.select("polyFeatures").take(3):
 </div>
 </div>
 
+## OneHotEncoder
+
+[One-hot encoding](http://en.wikipedia.org/wiki/One-hot) maps a column of label indices to a column of binary vectors, with at most a single one-value. This encoding allows algorithms which expect continuous features, such as Logistic Regression, to use categorical features 
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
+
+val df = sqlContext.createDataFrame(Seq(
+  (0, "a"),
+  (1, "b"),
+  (2, "c"),
+  (3, "a"),
+  (4, "a"),
+  (5, "c")
+)).toDF("id", "category")
+
+val indexer = new StringIndexer()
+  .setInputCol("category")
+  .setOutputCol("categoryIndex")
+  .fit(df)
+val indexed = indexer.transform(df)
+
+val encoder = new OneHotEncoder().setInputCol("categoryIndex").
+  setOutputCol("categoryVec")
+val encoded = encoder.transform(indexed)
+encoded.select("id", "categoryVec").foreach(println)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import com.google.common.collect.Lists;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.OneHotEncoder;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.ml.feature.StringIndexerModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+    RowFactory.create(0, "a"),
+    RowFactory.create(1, "b"),
+    RowFactory.create(2, "c"),
+    RowFactory.create(3, "a"),
+    RowFactory.create(4, "a"),
+    RowFactory.create(5, "c")
+));
+StructType schema = new StructType(new StructField[]{
+    new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
+    new StructField("category", DataTypes.StringType, false, Metadata.empty())
+});
+DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+StringIndexerModel indexer = new StringIndexer()
+  .setInputCol("category")
+  .setOutputCol("categoryIndex")
+  .fit(df);
+DataFrame indexed = indexer.transform(df);
+
+OneHotEncoder encoder = new OneHotEncoder()
+  .setInputCol("categoryIndex")
+  .setOutputCol("categoryVec");
+DataFrame encoded = encoder.transform(indexed);
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import OneHotEncoder, StringIndexer
+
+df = sqlContext.createDataFrame([
+  (0, "a"),
+  (1, "b"),
+  (2, "c"),
+  (3, "a"),
+  (4, "a"),
+  (5, "c")
+], ["id", "category"])
+
+stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+model = stringIndexer.fit(df)
+indexed = model.transform(df)
+encoder = OneHotEncoder(includeFirst=False, inputCol="categoryIndex", outputCol="categoryVec")
+encoded = encoder.transform(indexed)
+{% endhighlight %}
+</div>
+</div>
+
 # Feature Selectors
 

From 6338c40da61de045485c51aa11a5b1e425d22144 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <patrick@databricks.com>
Date: Wed, 20 May 2015 13:39:04 -0700
Subject: [PATCH 278/320] Revert "[SPARK-7320] [SQL] Add Cube / Rollup for
 dataframe"

This reverts commit 10698e1131f665addb454cd498669920699a91b2.
---
 .../org/apache/spark/sql/DataFrame.scala      | 104 +-----------------
 .../org/apache/spark/sql/GroupedData.scala    |  92 +++++-----------
 .../hive/HiveDataFrameAnalyticsSuite.scala    |  62 -----------
 3 files changed, 28 insertions(+), 230 deletions(-)
 delete mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index d78b4c2f8909c..adad85806d1ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -685,53 +685,7 @@ class DataFrame private[sql](
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def groupBy(cols: Column*): GroupedData = {
-    GroupedData(this, cols.map(_.expr), GroupedData.GroupByType)
-  }
-
-  /**
-   * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns,
-   * so we can run aggregation on them.
-   * See [[GroupedData]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns rolluped by department and group.
-   *   df.rollup($"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, rolluped by department and gender.
-   *   df.rollup($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group dfops
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def rollup(cols: Column*): GroupedData = {
-    GroupedData(this, cols.map(_.expr), GroupedData.RollupType)
-  }
-
-  /**
-   * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns,
-   * so we can run aggregation on them.
-   * See [[GroupedData]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns cubed by department and group.
-   *   df.cube($"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, cubed by department and gender.
-   *   df.cube($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group dfops
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def cube(cols: Column*): GroupedData = GroupedData(this, cols.map(_.expr), GroupedData.CubeType)
+  def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
 
   /**
    * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
@@ -756,61 +710,7 @@ class DataFrame private[sql](
   @scala.annotation.varargs
   def groupBy(col1: String, cols: String*): GroupedData = {
     val colNames: Seq[String] = col1 +: cols
-    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.GroupByType)
-  }
-
-  /**
-   * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns,
-   * so we can run aggregation on them.
-   * See [[GroupedData]] for all the available aggregate functions.
-   *
-   * This is a variant of rollup that can only group by existing columns using column names
-   * (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns rolluped by department and group.
-   *   df.rollup("department", "group").avg()
-   *
-   *   // Compute the max age and average salary, rolluped by department and gender.
-   *   df.rollup($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group dfops
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def rollup(col1: String, cols: String*): GroupedData = {
-    val colNames: Seq[String] = col1 +: cols
-    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.RollupType)
-  }
-
-  /**
-   * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns,
-   * so we can run aggregation on them.
-   * See [[GroupedData]] for all the available aggregate functions.
-   *
-   * This is a variant of cube that can only group by existing columns using column names
-   * (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns cubed by department and group.
-   *   df.cube("department", "group").avg()
-   *
-   *   // Compute the max age and average salary, cubed by department and gender.
-   *   df.cube($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group dfops
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def cube(col1: String, cols: String*): GroupedData = {
-    val colNames: Seq[String] = col1 +: cols
-    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.CubeType)
+    new GroupedData(this, colNames.map(colName => resolve(colName)))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index f730e4ae00e2b..1381b9f1a6080 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -23,40 +23,9 @@ import scala.language.implicitConversions
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.analysis.Star
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{Rollup, Cube, Aggregate}
+import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.types.NumericType
 
-/**
- * Companion object for GroupedData
- */
-private[sql] object GroupedData {
-  def apply(
-      df: DataFrame,
-      groupingExprs: Seq[Expression],
-      groupType: GroupType): GroupedData = {
-    new GroupedData(df, groupingExprs, groupType: GroupType)
-  }
-
-  /**
-   * The Grouping Type
-   */
-  trait GroupType
-
-  /**
-   * To indicate it's the GroupBy
-   */
-  object GroupByType extends GroupType
-
-  /**
-   * To indicate it's the CUBE
-   */
-  object CubeType extends GroupType
-
-  /**
-   * To indicate it's the ROLLUP
-   */
-  object RollupType extends GroupType
-}
 
 /**
  * :: Experimental ::
@@ -65,37 +34,19 @@ private[sql] object GroupedData {
  * @since 1.3.0
  */
 @Experimental
-class GroupedData protected[sql](
-    df: DataFrame,
-    groupingExprs: Seq[Expression],
-    private val groupType: GroupedData.GroupType) {
+class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
 
-  private[this] def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
-    val aggregates = if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
-        val retainedExprs = groupingExprs.map {
-          case expr: NamedExpression => expr
-          case expr: Expression => Alias(expr, expr.prettyString)()
-        }
-        retainedExprs ++ aggExprs
-      } else {
-        aggExprs
-      }
-
-    groupType match {
-      case GroupedData.GroupByType =>
-        DataFrame(
-          df.sqlContext, Aggregate(groupingExprs, aggregates, df.logicalPlan))
-      case GroupedData.RollupType =>
-        DataFrame(
-          df.sqlContext, Rollup(groupingExprs, df.logicalPlan, aggregates))
-      case GroupedData.CubeType =>
-        DataFrame(
-          df.sqlContext, Cube(groupingExprs, df.logicalPlan, aggregates))
+  private[sql] implicit def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
+    val namedGroupingExprs = groupingExprs.map {
+      case expr: NamedExpression => expr
+      case expr: Expression => Alias(expr, expr.prettyString)()
     }
+    DataFrame(
+      df.sqlContext, Aggregate(groupingExprs, namedGroupingExprs ++ aggExprs, df.logicalPlan))
   }
 
   private[this] def aggregateNumericColumns(colNames: String*)(f: Expression => Expression)
-    : DataFrame = {
+    : Seq[NamedExpression] = {
 
     val columnExprs = if (colNames.isEmpty) {
       // No columns specified. Use all numeric columns.
@@ -112,10 +63,10 @@ class GroupedData protected[sql](
         namedExpr
       }
     }
-    toDF(columnExprs.map { c =>
+    columnExprs.map { c =>
       val a = f(c)
       Alias(a, a.prettyString)()
-    })
+    }
   }
 
   private[this] def strToExpr(expr: String): (Expression => Expression) = {
@@ -168,10 +119,10 @@ class GroupedData protected[sql](
    * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = {
-    toDF(exprs.map { case (colName, expr) =>
+    exprs.map { case (colName, expr) =>
       val a = strToExpr(expr)(df(colName).expr)
       Alias(a, a.prettyString)()
-    }.toSeq)
+    }.toSeq
   }
 
   /**
@@ -224,10 +175,19 @@ class GroupedData protected[sql](
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = {
-    toDF((expr +: exprs).map(_.expr).map {
+    val aggExprs = (expr +: exprs).map(_.expr).map {
       case expr: NamedExpression => expr
       case expr: Expression => Alias(expr, expr.prettyString)()
-    })
+    }
+    if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
+      val retainedExprs = groupingExprs.map {
+        case expr: NamedExpression => expr
+        case expr: Expression => Alias(expr, expr.prettyString)()
+      }
+      DataFrame(df.sqlContext, Aggregate(groupingExprs, retainedExprs ++ aggExprs, df.logicalPlan))
+    } else {
+      DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
+    }
   }
 
   /**
@@ -236,7 +196,7 @@ class GroupedData protected[sql](
    *
    * @since 1.3.0
    */
-  def count(): DataFrame = toDF(Seq(Alias(Count(Literal(1)), "count")()))
+  def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")())
 
   /**
    * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
@@ -296,5 +256,5 @@ class GroupedData protected[sql](
   @scala.annotation.varargs
   def sum(colNames: String*): DataFrame = {
     aggregateNumericColumns(colNames:_*)(Sum)
-  }
+  }    
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
deleted file mode 100644
index 3ad05f482504c..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.hive.test.TestHive.implicits._
-
-case class TestData2Int(a: Int, b: Int)
-
-// TODO ideally we should put the test suite into the package `sql`, as
-// `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
-// support the `cube` or `rollup` yet.
-class HiveDataFrameAnalyticsSuite extends QueryTest {
-  val testData =
-    TestHive.sparkContext.parallelize(
-      TestData2Int(1, 2) ::
-        TestData2Int(2, 4) :: Nil).toDF()
-
-  testData.registerTempTable("mytable")
-
-  test("rollup") {
-    checkAnswer(
-      testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")),
-      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect()
-    )
-
-    checkAnswer(
-      testData.rollup("a", "b").agg(sum("b")),
-      sql("select a, b, sum(b) from mytable group by a, b with rollup").collect()
-    )
-  }
-
-  test("cube") {
-    checkAnswer(
-      testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")),
-      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect()
-    )
-
-    checkAnswer(
-      testData.cube("a", "b").agg(sum("b")),
-      sql("select a, b, sum(b) from mytable group by a, b with cube").collect()
-    )
-  }
-}

From 191ee474527530246ac3164ae9631e01bdd1e647 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@pigscanfly.ca>
Date: Wed, 20 May 2015 15:16:12 -0700
Subject: [PATCH 279/320] [SPARK-7511] [MLLIB] pyspark ml seed param should be
 random by default or 42 is quite funny but not very random

Author: Holden Karau <holden@pigscanfly.ca>

Closes #6139 from holdenk/SPARK-7511-pyspark-ml-seed-param-should-be-random-by-default-or-42-is-quite-funny-but-not-very-random and squashes the following commits:

591f8e5 [Holden Karau] specify old seed for doc tests
2470004 [Holden Karau] Fix a bunch of seeds with default values to have None as the default which will then result in using the hash of the class name
cbad96d [Holden Karau] Add the setParams function that is used in the real code
423b8d7 [Holden Karau] Switch the test code to behave slightly more like production code. also don't check the param map value only check for key existence
140d25d [Holden Karau] remove extra space
926165a [Holden Karau] Add some missing newlines for pep8 style
8616751 [Holden Karau] merge in master
58532e6 [Holden Karau] its the __name__ method, also treat None values as not set
56ef24a [Holden Karau] fix test and regenerate base
afdaa5c [Holden Karau] make sure different classes have different results
68eb528 [Holden Karau] switch default seed to hash of type of self
89c4611 [Holden Karau] Merge branch 'master' into SPARK-7511-pyspark-ml-seed-param-should-be-random-by-default-or-42-is-quite-funny-but-not-very-random
31cd96f [Holden Karau] specify the seed to randomforestregressor test
e1b947f [Holden Karau] Style fixes
ce90ec8 [Holden Karau] merge in master
bcdf3c9 [Holden Karau] update docstring seeds to none and some other default seeds from 42
65eba21 [Holden Karau] pep8 fixes
0e3797e [Holden Karau] Make seed default to random in more places
213a543 [Holden Karau] Simplify the generated code to only include set default if there is a default rather than having None is note None in the generated code
1ff17c2 [Holden Karau] Make the seed random for HasSeed in python
---
 python/pyspark/ml/classification.py           | 12 ++--
 python/pyspark/ml/feature.py                  | 10 +--
 python/pyspark/ml/param/__init__.py           |  2 +-
 .../ml/param/_shared_params_code_gen.py       |  9 +--
 python/pyspark/ml/param/shared.py             | 37 ++--------
 python/pyspark/ml/recommendation.py           | 10 +--
 python/pyspark/ml/regression.py               | 13 ++--
 python/pyspark/ml/tests.py                    | 67 +++++++++++++++++--
 8 files changed, 96 insertions(+), 64 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 4e645519c47c7..7abbde8b260eb 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -292,7 +292,7 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
     >>> si_model = stringIndexer.fit(df)
     >>> td = si_model.transform(df)
-    >>> rf = RandomForestClassifier(numTrees=2, maxDepth=2, labelCol="indexed")
+    >>> rf = RandomForestClassifier(numTrees=2, maxDepth=2, labelCol="indexed", seed=42)
     >>> model = rf.fit(td)
     >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> model.transform(test0).head().prediction
@@ -319,12 +319,12 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
-                 numTrees=20, featureSubsetStrategy="auto", seed=42):
+                 numTrees=20, featureSubsetStrategy="auto", seed=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
-                 numTrees=20, featureSubsetStrategy="auto", seed=42)
+                 numTrees=20, featureSubsetStrategy="auto", seed=None)
         """
         super(RandomForestClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -347,7 +347,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                   "The number of features to consider for splits at each tree node. Supported " +
                   "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
                          impurity="gini", numTrees=20, featureSubsetStrategy="auto")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -355,12 +355,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     @keyword_only
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto")
         Sets params for linear classification.
         """
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index c8115cb5bcf63..5511dceb70419 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -876,10 +876,10 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
 
     @keyword_only
     def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                 seed=42, inputCol=None, outputCol=None):
+                 seed=None, inputCol=None, outputCol=None):
         """
         __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
-                 seed=42, inputCol=None, outputCol=None)
+                 seed=None, inputCol=None, outputCol=None)
         """
         super(Word2Vec, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
@@ -891,15 +891,15 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
                               "the minimum number of times a token must appear to be included " +
                               "in the word2vec model's vocabulary")
         self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                         seed=42)
+                         seed=None)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                  seed=42, inputCol=None, outputCol=None):
+                  seed=None, inputCol=None, outputCol=None):
         """
-        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
+        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
                  inputCol=None, outputCol=None)
         Sets params for this Word2Vec.
         """
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 67fb6e3dc74fb..7845536161e07 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -147,7 +147,7 @@ def hasParam(self, paramName):
     def getOrDefault(self, param):
         """
         Gets the value of a param in the user-supplied param map or its
-        default value. Raises an error if either is set.
+        default value. Raises an error if neither is set.
         """
         param = self._resolveParam(param)
         if param in self._paramMap:
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 91e45ec373518..ccb929af184b8 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -56,9 +56,10 @@ def _gen_param_header(name, doc, defaultValueStr):
     def __init__(self):
         super(Has$Name, self).__init__()
         #: param for $doc
-        self.$name = Param(self, "$name", "$doc")
-        if $defaultValueStr is not None:
-            self._setDefault($name=$defaultValueStr)'''
+        self.$name = Param(self, "$name", "$doc")'''
+    if defaultValueStr is not None:
+        template += '''
+        self._setDefault($name=$defaultValueStr)'''
 
     Name = name[0].upper() + name[1:]
     return template \
@@ -118,7 +119,7 @@ def get$Name(self):
         ("outputCol", "output column name", None),
         ("numFeatures", "number of features", None),
         ("checkpointInterval", "checkpoint interval (>= 1)", None),
-        ("seed", "random seed", None),
+        ("seed", "random seed", "hash(type(self).__name__)"),
         ("tol", "the convergence tolerance for iterative algorithms", None),
         ("stepSize", "Step size to be used for each iteration of optimization.", None)]
     code = []
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index a5dc9b7ef29ed..0b93788899124 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -32,8 +32,6 @@ def __init__(self):
         super(HasMaxIter, self).__init__()
         #: param for max number of iterations (>= 0)
         self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)")
-        if None is not None:
-            self._setDefault(maxIter=None)
 
     def setMaxIter(self, value):
         """
@@ -61,8 +59,6 @@ def __init__(self):
         super(HasRegParam, self).__init__()
         #: param for regularization parameter (>= 0)
         self.regParam = Param(self, "regParam", "regularization parameter (>= 0)")
-        if None is not None:
-            self._setDefault(regParam=None)
 
     def setRegParam(self, value):
         """
@@ -90,8 +86,7 @@ def __init__(self):
         super(HasFeaturesCol, self).__init__()
         #: param for features column name
         self.featuresCol = Param(self, "featuresCol", "features column name")
-        if 'features' is not None:
-            self._setDefault(featuresCol='features')
+        self._setDefault(featuresCol='features')
 
     def setFeaturesCol(self, value):
         """
@@ -119,8 +114,7 @@ def __init__(self):
         super(HasLabelCol, self).__init__()
         #: param for label column name
         self.labelCol = Param(self, "labelCol", "label column name")
-        if 'label' is not None:
-            self._setDefault(labelCol='label')
+        self._setDefault(labelCol='label')
 
     def setLabelCol(self, value):
         """
@@ -148,8 +142,7 @@ def __init__(self):
         super(HasPredictionCol, self).__init__()
         #: param for prediction column name
         self.predictionCol = Param(self, "predictionCol", "prediction column name")
-        if 'prediction' is not None:
-            self._setDefault(predictionCol='prediction')
+        self._setDefault(predictionCol='prediction')
 
     def setPredictionCol(self, value):
         """
@@ -177,8 +170,7 @@ def __init__(self):
         super(HasProbabilityCol, self).__init__()
         #: param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
         self.probabilityCol = Param(self, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.")
-        if 'probability' is not None:
-            self._setDefault(probabilityCol='probability')
+        self._setDefault(probabilityCol='probability')
 
     def setProbabilityCol(self, value):
         """
@@ -206,8 +198,7 @@ def __init__(self):
         super(HasRawPredictionCol, self).__init__()
         #: param for raw prediction (a.k.a. confidence) column name
         self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
-        if 'rawPrediction' is not None:
-            self._setDefault(rawPredictionCol='rawPrediction')
+        self._setDefault(rawPredictionCol='rawPrediction')
 
     def setRawPredictionCol(self, value):
         """
@@ -235,8 +226,6 @@ def __init__(self):
         super(HasInputCol, self).__init__()
         #: param for input column name
         self.inputCol = Param(self, "inputCol", "input column name")
-        if None is not None:
-            self._setDefault(inputCol=None)
 
     def setInputCol(self, value):
         """
@@ -264,8 +253,6 @@ def __init__(self):
         super(HasInputCols, self).__init__()
         #: param for input column names
         self.inputCols = Param(self, "inputCols", "input column names")
-        if None is not None:
-            self._setDefault(inputCols=None)
 
     def setInputCols(self, value):
         """
@@ -293,8 +280,6 @@ def __init__(self):
         super(HasOutputCol, self).__init__()
         #: param for output column name
         self.outputCol = Param(self, "outputCol", "output column name")
-        if None is not None:
-            self._setDefault(outputCol=None)
 
     def setOutputCol(self, value):
         """
@@ -322,8 +307,6 @@ def __init__(self):
         super(HasNumFeatures, self).__init__()
         #: param for number of features
         self.numFeatures = Param(self, "numFeatures", "number of features")
-        if None is not None:
-            self._setDefault(numFeatures=None)
 
     def setNumFeatures(self, value):
         """
@@ -351,8 +334,6 @@ def __init__(self):
         super(HasCheckpointInterval, self).__init__()
         #: param for checkpoint interval (>= 1)
         self.checkpointInterval = Param(self, "checkpointInterval", "checkpoint interval (>= 1)")
-        if None is not None:
-            self._setDefault(checkpointInterval=None)
 
     def setCheckpointInterval(self, value):
         """
@@ -380,8 +361,7 @@ def __init__(self):
         super(HasSeed, self).__init__()
         #: param for random seed
         self.seed = Param(self, "seed", "random seed")
-        if None is not None:
-            self._setDefault(seed=None)
+        self._setDefault(seed=hash(type(self).__name__))
 
     def setSeed(self, value):
         """
@@ -409,8 +389,6 @@ def __init__(self):
         super(HasTol, self).__init__()
         #: param for the convergence tolerance for iterative algorithms
         self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
-        if None is not None:
-            self._setDefault(tol=None)
 
     def setTol(self, value):
         """
@@ -438,8 +416,6 @@ def __init__(self):
         super(HasStepSize, self).__init__()
         #: param for Step size to be used for each iteration of optimization.
         self.stepSize = Param(self, "stepSize", "Step size to be used for each iteration of optimization.")
-        if None is not None:
-            self._setDefault(stepSize=None)
 
     def setStepSize(self, value):
         """
@@ -467,6 +443,7 @@ class DecisionTreeParams(Params):
     minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.")
     maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
     cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
+    
 
     def __init__(self):
         super(DecisionTreeParams, self).__init__()
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 39c2527543774..b3e0dd7abf681 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -89,11 +89,11 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
 
     @keyword_only
     def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
                  ratingCol="rating", nonnegative=False, checkpointInterval=10):
         """
         __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
-                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0, \
+                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=None, \
                  ratingCol="rating", nonnegative=false, checkpointInterval=10)
         """
         super(ALS, self).__init__()
@@ -109,18 +109,18 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
         self.nonnegative = Param(self, "nonnegative",
                                  "whether to use nonnegative constraint for least squares")
         self._setDefault(rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
                          ratingCol="rating", nonnegative=False, checkpointInterval=10)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
                   ratingCol="rating", nonnegative=False, checkpointInterval=10):
         """
         setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
-                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0, \
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, \
                  ratingCol="rating", nonnegative=False, checkpointInterval=10)
         Sets params for ALS.
         """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index ff809cdafdf51..b139e27372d80 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -257,7 +257,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
-    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2)
+    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2, seed=42)
     >>> model = rf.fit(df)
     >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> model.transform(test0).head().prediction
@@ -284,12 +284,13 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
-                 numTrees=20, featureSubsetStrategy="auto", seed=42):
+                 numTrees=20, featureSubsetStrategy="auto", seed=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
-                 impurity="variance", numTrees=20, featureSubsetStrategy="auto", seed=42)
+                 impurity="variance", numTrees=20, \
+                 featureSubsetStrategy="auto", seed=None)
         """
         super(RandomForestRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -312,7 +313,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                   "The number of features to consider for splits at each tree node. Supported " +
                   "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies))
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
                          impurity="variance", numTrees=20, featureSubsetStrategy="auto")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
@@ -320,12 +321,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     @keyword_only
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
                   impurity="variance", numTrees=20, featureSubsetStrategy="auto"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
-                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
                   impurity="variance", numTrees=20, featureSubsetStrategy="auto")
         Sets params for linear regression.
         """
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 10fe0ef8db38f..6adbf166f34a8 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -33,7 +33,8 @@
 from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
 from pyspark.sql import DataFrame, SQLContext
 from pyspark.ml.param import Param, Params
-from pyspark.ml.param.shared import HasMaxIter, HasInputCol
+from pyspark.ml.param.shared import HasMaxIter, HasInputCol, HasSeed
+from pyspark.ml.util import keyword_only
 from pyspark.ml import Estimator, Model, Pipeline, Transformer
 from pyspark.ml.feature import *
 from pyspark.mllib.linalg import DenseVector
@@ -111,14 +112,46 @@ def test_pipeline(self):
         self.assertEqual(6, dataset.index)
 
 
-class TestParams(HasMaxIter, HasInputCol):
+class TestParams(HasMaxIter, HasInputCol, HasSeed):
     """
-    A subclass of Params mixed with HasMaxIter and HasInputCol.
+    A subclass of Params mixed with HasMaxIter, HasInputCol and HasSeed.
     """
-
-    def __init__(self):
+    @keyword_only
+    def __init__(self, seed=None):
         super(TestParams, self).__init__()
         self._setDefault(maxIter=10)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, seed=None):
+        """
+        setParams(self, seed=None)
+        Sets params for this test.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+
+class OtherTestParams(HasMaxIter, HasInputCol, HasSeed):
+    """
+    A subclass of Params mixed with HasMaxIter, HasInputCol and HasSeed.
+    """
+    @keyword_only
+    def __init__(self, seed=None):
+        super(OtherTestParams, self).__init__()
+        self._setDefault(maxIter=10)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, seed=None):
+        """
+        setParams(self, seed=None)
+        Sets params for this test.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
 
 
 class ParamTests(PySparkTestCase):
@@ -134,9 +167,10 @@ def test_params(self):
         testParams = TestParams()
         maxIter = testParams.maxIter
         inputCol = testParams.inputCol
+        seed = testParams.seed
 
         params = testParams.params
-        self.assertEqual(params, [inputCol, maxIter])
+        self.assertEqual(params, [inputCol, maxIter, seed])
 
         self.assertTrue(testParams.hasParam(maxIter))
         self.assertTrue(testParams.hasDefault(maxIter))
@@ -154,10 +188,29 @@ def test_params(self):
         with self.assertRaises(KeyError):
             testParams.getInputCol()
 
+        # Since the default is normally random, set it to a known number for debug str
+        testParams._setDefault(seed=41)
+        testParams.setSeed(43)
+
         self.assertEquals(
             testParams.explainParams(),
             "\n".join(["inputCol: input column name (undefined)",
-                       "maxIter: max number of iterations (>= 0) (default: 10, current: 100)"]))
+                       "maxIter: max number of iterations (>= 0) (default: 10, current: 100)",
+                       "seed: random seed (default: 41, current: 43)"]))
+
+    def test_hasseed(self):
+        noSeedSpecd = TestParams()
+        withSeedSpecd = TestParams(seed=42)
+        other = OtherTestParams()
+        # Check that we no longer use 42 as the magic number
+        self.assertNotEqual(noSeedSpecd.getSeed(), 42)
+        origSeed = noSeedSpecd.getSeed()
+        # Check that we only compute the seed once
+        self.assertEqual(noSeedSpecd.getSeed(), origSeed)
+        # Check that a specified seed is honored
+        self.assertEqual(withSeedSpecd.getSeed(), 42)
+        # Check that a different class has a different seed
+        self.assertNotEqual(other.getSeed(), noSeedSpecd.getSeed())
 
 
 class FeatureTests(PySparkTestCase):

From 9b84443dd43777e25b0b00468c61814fe6d26c23 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 20 May 2015 15:39:32 -0700
Subject: [PATCH 280/320] [SPARK-7237] [SPARK-7741] [CORE] [STREAMING] Clean
 more closures that need cleaning

SPARK-7741 is the equivalent of SPARK-7237 in streaming. This is an alternative to #6268.

Author: Andrew Or <andrew@databricks.com>

Closes #6269 from andrewor14/clean-moar and squashes the following commits:

c51c9ab [Andrew Or] Add periods (trivial)
6c686ac [Andrew Or] Merge branch 'master' of github.com:apache/spark into clean-moar
79a435b [Andrew Or] Fix tests
d18c9f9 [Andrew Or] Merge branch 'master' of github.com:apache/spark into clean-moar
65ef07b [Andrew Or] Fix tests?
4b487a3 [Andrew Or] Add tests for closures passed to DStream operations
328139b [Andrew Or] Do not forget foreachRDD
5431f61 [Andrew Or] Clean streaming closures
72b7b73 [Andrew Or] Clean core closures
---
 .../scala/org/apache/spark/SparkContext.scala |   4 +-
 .../apache/spark/rdd/PairRDDFunctions.scala   |   5 +-
 .../spark/util/ClosureCleanerSuite.scala      |   4 +
 .../spark/streaming/StreamingContext.scala    |   2 +-
 .../spark/streaming/dstream/DStream.scala     |   5 +-
 .../dstream/PairDStreamFunctions.scala        |  30 ++-
 .../spark/streaming/DStreamClosureSuite.scala | 196 ++++++++++++++++++
 .../spark/streaming/DStreamScopeSuite.scala   |  22 +-
 .../spark/streaming/TestSuiteBase.scala       |  18 ++
 9 files changed, 249 insertions(+), 37 deletions(-)
 create mode 100644 streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 3fe3dc5e300e8..cf3820fcb6a35 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1159,8 +1159,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
         kcf: () => WritableConverter[K], vcf: () => WritableConverter[V]): RDD[(K, V)] = {
     withScope {
       assertNotStopped()
-      val kc = kcf()
-      val vc = vcf()
+      val kc = clean(kcf)()
+      val vc = clean(vcf)()
       val format = classOf[SequenceFileInputFormat[Writable, Writable]]
       val writables = hadoopFile(path, format,
         kc.writableClass(km).asInstanceOf[Class[Writable]],
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index a6d5d2c94e17f..8653cdee1adee 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -296,6 +296,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * before sending results to a reducer, similarly to a "combiner" in MapReduce.
    */
   def reduceByKeyLocally(func: (V, V) => V): Map[K, V] = self.withScope {
+    val cleanedF = self.sparkContext.clean(func)
 
     if (keyClass.isArray) {
       throw new SparkException("reduceByKeyLocally() does not support array keys")
@@ -305,7 +306,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       val map = new JHashMap[K, V]
       iter.foreach { pair =>
         val old = map.get(pair._1)
-        map.put(pair._1, if (old == null) pair._2 else func(old, pair._2))
+        map.put(pair._1, if (old == null) pair._2 else cleanedF(old, pair._2))
       }
       Iterator(map)
     } : Iterator[JHashMap[K, V]]
@@ -313,7 +314,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     val mergeMaps = (m1: JHashMap[K, V], m2: JHashMap[K, V]) => {
       m2.foreach { pair =>
         val old = m1.get(pair._1)
-        m1.put(pair._1, if (old == null) pair._2 else func(old, pair._2))
+        m1.put(pair._1, if (old == null) pair._2 else cleanedF(old, pair._2))
       }
       m1
     } : JHashMap[K, V]
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index e41f6ee27764e..7b165fe28bdd3 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -112,6 +112,7 @@ class ClosureCleanerSuite extends FunSuite {
       expectCorrectException { TestUserClosuresActuallyCleaned.testAggregateByKey(pairRdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testFoldByKey(pairRdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testReduceByKey(pairRdd) }
+      expectCorrectException { TestUserClosuresActuallyCleaned.testReduceByKeyLocally(pairRdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testMapValues(pairRdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testFlatMapValues(pairRdd) }
       expectCorrectException { TestUserClosuresActuallyCleaned.testForeachAsync(rdd) }
@@ -315,6 +316,9 @@ private object TestUserClosuresActuallyCleaned {
   }
   def testFoldByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.foldByKey(0) { case (_, _) => return; 1 } }
   def testReduceByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.reduceByKey { case (_, _) => return; 1 } }
+  def testReduceByKeyLocally(rdd: RDD[(Int, Int)]): Unit = {
+    rdd.reduceByKeyLocally { case (_, _) => return; 1 }
+  }
   def testMapValues(rdd: RDD[(Int, Int)]): Unit = { rdd.mapValues { _ => return; 1 } }
   def testFlatMapValues(rdd: RDD[(Int, Int)]): Unit = { rdd.flatMapValues { _ => return; Seq() } }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 7f181bcecd4bf..fe614c4be590f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -255,7 +255,7 @@ class StreamingContext private[streaming] (
    *
    * Note: Return statements are NOT allowed in the given body.
    */
-  private[streaming] def withNamedScope[U](name: String)(body: => U): U = {
+  private def withNamedScope[U](name: String)(body: => U): U = {
     RDDOperationScope.withScope(sc, name, allowNesting = false, ignoreParent = false)(body)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 5977481e1f081..7c50a766a9bad 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -539,7 +539,7 @@ abstract class DStream[T: ClassTag] (
 
   /** Return a new DStream containing only the elements that satisfy a predicate. */
   def filter(filterFunc: T => Boolean): DStream[T] = ssc.withScope {
-    new FilteredDStream(this, filterFunc)
+    new FilteredDStream(this, context.sparkContext.clean(filterFunc))
   }
 
   /**
@@ -624,7 +624,8 @@ abstract class DStream[T: ClassTag] (
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
   def foreachRDD(foreachFunc: RDD[T] => Unit): Unit = ssc.withScope {
-    this.foreachRDD((r: RDD[T], t: Time) => foreachFunc(r))
+    val cleanedF = context.sparkContext.clean(foreachFunc, false)
+    this.foreachRDD((r: RDD[T], t: Time) => cleanedF(r))
   }
 
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 884a8e8b52289..fda22eb6ec42e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -38,6 +38,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
 {
   private[streaming] def ssc = self.ssc
 
+  private[streaming] def sparkContext = self.context.sparkContext
+
   private[streaming] def defaultPartitioner(numPartitions: Int = self.ssc.sc.defaultParallelism) = {
     new HashPartitioner(numPartitions)
   }
@@ -98,8 +100,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def reduceByKey(
       reduceFunc: (V, V) => V,
       partitioner: Partitioner): DStream[(K, V)] = ssc.withScope {
-    val cleanedReduceFunc = ssc.sc.clean(reduceFunc)
-    combineByKey((v: V) => v, cleanedReduceFunc, cleanedReduceFunc, partitioner)
+    combineByKey((v: V) => v, reduceFunc, reduceFunc, partitioner)
   }
 
   /**
@@ -113,7 +114,15 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       mergeCombiner: (C, C) => C,
       partitioner: Partitioner,
       mapSideCombine: Boolean = true): DStream[(K, C)] = ssc.withScope {
-    new ShuffledDStream[K, V, C](self, createCombiner, mergeValue, mergeCombiner, partitioner,
+    val cleanedCreateCombiner = sparkContext.clean(createCombiner)
+    val cleanedMergeValue = sparkContext.clean(mergeValue)
+    val cleanedMergeCombiner = sparkContext.clean(mergeCombiner)
+    new ShuffledDStream[K, V, C](
+      self,
+      cleanedCreateCombiner,
+      cleanedMergeValue,
+      cleanedMergeCombiner,
+      partitioner,
       mapSideCombine)
   }
 
@@ -264,10 +273,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       slideDuration: Duration,
       partitioner: Partitioner
     ): DStream[(K, V)] = ssc.withScope {
-    val cleanedReduceFunc = ssc.sc.clean(reduceFunc)
-    self.reduceByKey(cleanedReduceFunc, partitioner)
+    self.reduceByKey(reduceFunc, partitioner)
         .window(windowDuration, slideDuration)
-        .reduceByKey(cleanedReduceFunc, partitioner)
+        .reduceByKey(reduceFunc, partitioner)
   }
 
   /**
@@ -385,8 +393,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       updateFunc: (Seq[V], Option[S]) => Option[S],
       partitioner: Partitioner
     ): DStream[(K, S)] = ssc.withScope {
+    val cleanedUpdateF = sparkContext.clean(updateFunc)
     val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => {
-      iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+      iterator.flatMap(t => cleanedUpdateF(t._2, t._3).map(s => (t._1, s)))
     }
     updateStateByKey(newUpdateFunc, partitioner, true)
   }
@@ -428,8 +437,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
       partitioner: Partitioner,
       initialRDD: RDD[(K, S)]
     ): DStream[(K, S)] = ssc.withScope {
+    val cleanedUpdateF = sparkContext.clean(updateFunc)
     val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => {
-      iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s)))
+      iterator.flatMap(t => cleanedUpdateF(t._2, t._3).map(s => (t._1, s)))
     }
     updateStateByKey(newUpdateFunc, partitioner, true, initialRDD)
   }
@@ -463,7 +473,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
    * 'this' DStream without changing the key.
    */
   def mapValues[U: ClassTag](mapValuesFunc: V => U): DStream[(K, U)] = ssc.withScope {
-    new MapValuedDStream[K, V, U](self, mapValuesFunc)
+    new MapValuedDStream[K, V, U](self, sparkContext.clean(mapValuesFunc))
   }
 
   /**
@@ -473,7 +483,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)])
   def flatMapValues[U: ClassTag](
       flatMapValuesFunc: V => TraversableOnce[U]
     ): DStream[(K, U)] = ssc.withScope {
-    new FlatMapValuedDStream[K, V, U](self, flatMapValuesFunc)
+    new FlatMapValuedDStream[K, V, U](self, sparkContext.clean(flatMapValuesFunc))
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
new file mode 100644
index 0000000000000..6a1dd6949b204
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import java.io.NotSerializableException
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.{HashPartitioner, SparkContext, SparkException}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.util.ReturnStatementInClosureException
+
+/**
+ * Test that closures passed to DStream operations are actually cleaned.
+ */
+class DStreamClosureSuite extends FunSuite with BeforeAndAfterAll {
+  private var ssc: StreamingContext = null
+
+  override def beforeAll(): Unit = {
+    val sc = new SparkContext("local", "test")
+    ssc = new StreamingContext(sc, Seconds(1))
+  }
+
+  override def afterAll(): Unit = {
+    ssc.stop(stopSparkContext = true)
+    ssc = null
+  }
+
+  test("user provided closures are actually cleaned") {
+    val dstream = new DummyInputDStream(ssc)
+    val pairDstream = dstream.map { i => (i, i) }
+    // DStream
+    testMap(dstream)
+    testFlatMap(dstream)
+    testFilter(dstream)
+    testMapPartitions(dstream)
+    testReduce(dstream)
+    testForeach(dstream)
+    testForeachRDD(dstream)
+    testTransform(dstream)
+    testTransformWith(dstream)
+    testReduceByWindow(dstream)
+    // PairDStreamFunctions
+    testReduceByKey(pairDstream)
+    testCombineByKey(pairDstream)
+    testReduceByKeyAndWindow(pairDstream)
+    testUpdateStateByKey(pairDstream)
+    testMapValues(pairDstream)
+    testFlatMapValues(pairDstream)
+    // StreamingContext
+    testTransform2(ssc, dstream)
+  }
+
+  /**
+   * Verify that the expected exception is thrown.
+   *
+   * We use return statements as an indication that a closure is actually being cleaned.
+   * We expect closure cleaner to find the return statements in the user provided closures.
+   */
+  private def expectCorrectException(body: => Unit): Unit = {
+    try {
+      body
+    } catch {
+      case rse: ReturnStatementInClosureException => // Success!
+      case e @ (_: NotSerializableException | _: SparkException) =>
+        throw new TestException(
+          s"Expected ReturnStatementInClosureException, but got $e.\n" +
+          "This means the closure provided by user is not actually cleaned.")
+    }
+  }
+
+  // DStream operations
+  private def testMap(ds: DStream[Int]): Unit = expectCorrectException {
+    ds.map { _ => return; 1 }
+  }
+  private def testFlatMap(ds: DStream[Int]): Unit = expectCorrectException {
+    ds.flatMap { _ => return; Seq.empty }
+  }
+  private def testFilter(ds: DStream[Int]): Unit = expectCorrectException {
+    ds.filter { _ => return; true }
+  }
+  private def testMapPartitions(ds: DStream[Int]): Unit = expectCorrectException {
+    ds.mapPartitions { _ => return; Seq.empty.toIterator }
+  }
+  private def testReduce(ds: DStream[Int]): Unit = expectCorrectException {
+    ds.reduce { case (_, _) => return; 1 }
+  }
+  private def testForeach(ds: DStream[Int]): Unit = {
+    val foreachF1 = (rdd: RDD[Int], t: Time) => return
+    val foreachF2 = (rdd: RDD[Int]) => return
+    expectCorrectException { ds.foreach(foreachF1) }
+    expectCorrectException { ds.foreach(foreachF2) }
+  }
+  private def testForeachRDD(ds: DStream[Int]): Unit = {
+    val foreachRDDF1 = (rdd: RDD[Int], t: Time) => return
+    val foreachRDDF2 = (rdd: RDD[Int]) => return
+    expectCorrectException { ds.foreachRDD(foreachRDDF1) }
+    expectCorrectException { ds.foreachRDD(foreachRDDF2) }
+  }
+  private def testTransform(ds: DStream[Int]): Unit = {
+    val transformF1 = (rdd: RDD[Int]) => { return; rdd }
+    val transformF2 = (rdd: RDD[Int], time: Time) => { return; rdd }
+    expectCorrectException { ds.transform(transformF1) }
+    expectCorrectException { ds.transform(transformF2) }
+  }
+  private def testTransformWith(ds: DStream[Int]): Unit = {
+    val transformF1 = (rdd1: RDD[Int], rdd2: RDD[Int]) => { return; rdd1 }
+    val transformF2 = (rdd1: RDD[Int], rdd2: RDD[Int], time: Time) => { return; rdd2 }
+    expectCorrectException { ds.transformWith(ds, transformF1) }
+    expectCorrectException { ds.transformWith(ds, transformF2) }
+  }
+  private def testReduceByWindow(ds: DStream[Int]): Unit = {
+    val reduceF = (_: Int, _: Int) => { return; 1 }
+    expectCorrectException { ds.reduceByWindow(reduceF, Seconds(1), Seconds(2)) }
+    expectCorrectException { ds.reduceByWindow(reduceF, reduceF, Seconds(1), Seconds(2)) }
+  }
+
+  // PairDStreamFunctions operations
+  private def testReduceByKey(ds: DStream[(Int, Int)]): Unit = {
+    val reduceF = (_: Int, _: Int) => { return; 1 }
+    expectCorrectException { ds.reduceByKey(reduceF) }
+    expectCorrectException { ds.reduceByKey(reduceF, 5) }
+    expectCorrectException { ds.reduceByKey(reduceF, new HashPartitioner(5)) }
+  }
+  private def testCombineByKey(ds: DStream[(Int, Int)]): Unit = {
+    expectCorrectException {
+      ds.combineByKey[Int](
+        { _: Int => return; 1 },
+        { case (_: Int, _: Int) => return; 1 },
+        { case (_: Int, _: Int) => return; 1 },
+        new HashPartitioner(5)
+      )
+    }
+  }
+  private def testReduceByKeyAndWindow(ds: DStream[(Int, Int)]): Unit = {
+    val reduceF = (_: Int, _: Int) => { return; 1 }
+    val filterF = (_: (Int, Int)) => { return; false }
+    expectCorrectException { ds.reduceByKeyAndWindow(reduceF, Seconds(1)) }
+    expectCorrectException { ds.reduceByKeyAndWindow(reduceF, Seconds(1), Seconds(2)) }
+    expectCorrectException { ds.reduceByKeyAndWindow(reduceF, Seconds(1), Seconds(2), 5) }
+    expectCorrectException {
+      ds.reduceByKeyAndWindow(reduceF, Seconds(1), Seconds(2), new HashPartitioner(5))
+    }
+    expectCorrectException { ds.reduceByKeyAndWindow(reduceF, reduceF, Seconds(2)) }
+    expectCorrectException {
+      ds.reduceByKeyAndWindow(
+        reduceF, reduceF, Seconds(2), Seconds(3), new HashPartitioner(5), filterF)
+    }
+  }
+  private def testUpdateStateByKey(ds: DStream[(Int, Int)]): Unit = {
+    val updateF1 = (_: Seq[Int], _: Option[Int]) => { return; Some(1) }
+    val updateF2 = (_: Iterator[(Int, Seq[Int], Option[Int])]) => { return; Seq((1, 1)).toIterator }
+    val initialRDD = ds.ssc.sparkContext.emptyRDD[Int].map { i => (i, i) }
+    expectCorrectException { ds.updateStateByKey(updateF1) }
+    expectCorrectException { ds.updateStateByKey(updateF1, 5) }
+    expectCorrectException { ds.updateStateByKey(updateF1, new HashPartitioner(5)) }
+    expectCorrectException {
+      ds.updateStateByKey(updateF1, new HashPartitioner(5), initialRDD)
+    }
+    expectCorrectException {
+      ds.updateStateByKey(updateF2, new HashPartitioner(5), true)
+    }
+    expectCorrectException {
+      ds.updateStateByKey(updateF2, new HashPartitioner(5), true, initialRDD)
+    }
+  }
+  private def testMapValues(ds: DStream[(Int, Int)]): Unit = expectCorrectException {
+    ds.mapValues { _ => return; 1 }
+  }
+  private def testFlatMapValues(ds: DStream[(Int, Int)]): Unit = expectCorrectException {
+    ds.flatMapValues { _ => return; Seq.empty }
+  }
+
+  // StreamingContext operations
+  private def testTransform2(ssc: StreamingContext, ds: DStream[Int]): Unit = {
+    val transformF = (rdds: Seq[RDD[_]], time: Time) => { return; ssc.sparkContext.emptyRDD[Int] }
+    expectCorrectException { ssc.transform(Seq(ds), transformF) }
+  }
+
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
index 392933102097e..e3fb2ef130859 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.streaming
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
 
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.{RDD, RDDOperationScope}
-import org.apache.spark.streaming.dstream.{DStream, InputDStream}
+import org.apache.spark.rdd.RDDOperationScope
+import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.ui.UIUtils
 
 /**
@@ -170,21 +170,3 @@ class DStreamScopeSuite extends FunSuite with BeforeAndAfter with BeforeAndAfter
   }
 
 }
-
-/**
- * A dummy stream that does absolutely nothing.
- */
-private class DummyDStream(ssc: StreamingContext) extends DStream[Int](ssc) {
-  override def dependencies: List[DStream[Int]] = List.empty
-  override def slideDuration: Duration = Seconds(1)
-  override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int])
-}
-
-/**
- * A dummy input stream that does absolutely nothing.
- */
-private class DummyInputDStream(ssc: StreamingContext) extends InputDStream[Int](ssc) {
-  override def start(): Unit = { }
-  override def stop(): Unit = { }
-  override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int])
-}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 4f70ae7f1f187..554cd30223f44 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -35,6 +35,24 @@ import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream
 import org.apache.spark.streaming.scheduler._
 import org.apache.spark.util.{ManualClock, Utils}
 
+/**
+ * A dummy stream that does absolutely nothing.
+ */
+private[streaming] class DummyDStream(ssc: StreamingContext) extends DStream[Int](ssc) {
+  override def dependencies: List[DStream[Int]] = List.empty
+  override def slideDuration: Duration = Seconds(1)
+  override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int])
+}
+
+/**
+ * A dummy input stream that does absolutely nothing.
+ */
+private[streaming] class DummyInputDStream(ssc: StreamingContext) extends InputDStream[Int](ssc) {
+  override def start(): Unit = { }
+  override def stop(): Unit = { }
+  override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int])
+}
+
 /**
  * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
  * replayable, reliable message queue like Kafka. It requires a sequence as input, and

From 3c434cbfd0d6821e5bcf572be792b787a514018b Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 20 May 2015 16:21:23 -0700
Subject: [PATCH 281/320] [SPARK-7767] [STREAMING] Added test for checkpoint
 serialization in StreamingContext.start()

Currently, the background checkpointing thread fails silently if the checkpoint is not serializable. It is hard to debug and therefore its best to fail fast at `start()` when checkpointing is enabled and the checkpoint is not serializable.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6292 from tdas/SPARK-7767 and squashes the following commits:

51304e6 [Tathagata Das] Addressed comments.
c35237b [Tathagata Das] Added test for checkpoint serialization in StreamingContext.start()
---
 .../serializer/SerializationDebugger.scala    |  2 +-
 .../apache/spark/streaming/Checkpoint.scala   | 70 +++++++++++--------
 .../spark/streaming/StreamingContext.scala    | 26 ++++++-
 .../streaming/StreamingContextSuite.scala     | 27 +++++--
 4 files changed, 89 insertions(+), 36 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
index 5abfa467c0ec8..bb5db545531d2 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.Logging
 
-private[serializer] object SerializationDebugger extends Logging {
+private[spark] object SerializationDebugger extends Logging {
 
   /**
    * Improve the given NotSerializableException with the serialization path leading from the given
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 7bfae253c3a0c..d8dc4e4101664 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -102,6 +102,44 @@ object Checkpoint extends Logging {
       Seq.empty
     }
   }
+
+  /** Serialize the checkpoint, or throw any exception that occurs */
+  def serialize(checkpoint: Checkpoint, conf: SparkConf): Array[Byte] = {
+    val compressionCodec = CompressionCodec.createCodec(conf)
+    val bos = new ByteArrayOutputStream()
+    val zos = compressionCodec.compressedOutputStream(bos)
+    val oos = new ObjectOutputStream(zos)
+    Utils.tryWithSafeFinally {
+      oos.writeObject(checkpoint)
+    } {
+      oos.close()
+    }
+    bos.toByteArray
+  }
+
+  /** Deserialize a checkpoint from the input stream, or throw any exception that occurs */
+  def deserialize(inputStream: InputStream, conf: SparkConf): Checkpoint = {
+    val compressionCodec = CompressionCodec.createCodec(conf)
+    var ois: ObjectInputStreamWithLoader = null
+    Utils.tryWithSafeFinally {
+
+      // ObjectInputStream uses the last defined user-defined class loader in the stack
+      // to find classes, which maybe the wrong class loader. Hence, a inherited version
+      // of ObjectInputStream is used to explicitly use the current thread's default class
+      // loader to find and load classes. This is a well know Java issue and has popped up
+      // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
+      val zis = compressionCodec.compressedInputStream(inputStream)
+      ois = new ObjectInputStreamWithLoader(zis,
+        Thread.currentThread().getContextClassLoader)
+      val cp = ois.readObject.asInstanceOf[Checkpoint]
+      cp.validate()
+      cp
+    } {
+      if (ois != null) {
+        ois.close()
+      }
+    }
+  }
 }
 
 
@@ -189,17 +227,10 @@ class CheckpointWriter(
   }
 
   def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean) {
-    val bos = new ByteArrayOutputStream()
-    val zos = compressionCodec.compressedOutputStream(bos)
-    val oos = new ObjectOutputStream(zos)
-    Utils.tryWithSafeFinally {
-      oos.writeObject(checkpoint)
-    } {
-      oos.close()
-    }
     try {
+      val bytes = Checkpoint.serialize(checkpoint, conf)
       executor.execute(new CheckpointWriteHandler(
-        checkpoint.checkpointTime, bos.toByteArray, clearCheckpointDataLater))
+        checkpoint.checkpointTime, bytes, clearCheckpointDataLater))
       logDebug("Submitted checkpoint of time " + checkpoint.checkpointTime + " writer queue")
     } catch {
       case rej: RejectedExecutionException =>
@@ -264,25 +295,8 @@ object CheckpointReader extends Logging {
     checkpointFiles.foreach(file => {
       logInfo("Attempting to load checkpoint from file " + file)
       try {
-        var ois: ObjectInputStreamWithLoader = null
-        var cp: Checkpoint = null
-        Utils.tryWithSafeFinally {
-          val fis = fs.open(file)
-          // ObjectInputStream uses the last defined user-defined class loader in the stack
-          // to find classes, which maybe the wrong class loader. Hence, a inherited version
-          // of ObjectInputStream is used to explicitly use the current thread's default class
-          // loader to find and load classes. This is a well know Java issue and has popped up
-          // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
-          val zis = compressionCodec.compressedInputStream(fis)
-          ois = new ObjectInputStreamWithLoader(zis,
-            Thread.currentThread().getContextClassLoader)
-          cp = ois.readObject.asInstanceOf[Checkpoint]
-        } {
-          if (ois != null) {
-            ois.close()
-          }
-        }
-        cp.validate()
+        val fis = fs.open(file)
+        val cp = Checkpoint.deserialize(fis, conf)
         logInfo("Checkpoint successfully loaded from file " + file)
         logInfo("Checkpoint was generated at time " + cp.checkpointTime)
         return Some(cp)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index fe614c4be590f..95063692e1146 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming
 
-import java.io.InputStream
+import java.io.{InputStream, NotSerializableException}
 import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 
 import scala.collection.Map
@@ -35,6 +35,7 @@ import org.apache.spark._
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.input.FixedLengthBinaryInputFormat
 import org.apache.spark.rdd.{RDD, RDDOperationScope}
+import org.apache.spark.serializer.SerializationDebugger
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContextState._
 import org.apache.spark.streaming.dstream._
@@ -235,6 +236,10 @@ class StreamingContext private[streaming] (
     }
   }
 
+  private[streaming] def isCheckpointingEnabled: Boolean = {
+    checkpointDir != null
+  }
+
   private[streaming] def initialCheckpoint: Checkpoint = {
     if (isCheckpointPresent) cp_ else null
   }
@@ -523,11 +528,26 @@ class StreamingContext private[streaming] (
     assert(graph != null, "Graph is null")
     graph.validate()
 
-    assert(
-      checkpointDir == null || checkpointDuration != null,
+    require(
+      !isCheckpointingEnabled || checkpointDuration != null,
       "Checkpoint directory has been set, but the graph checkpointing interval has " +
         "not been set. Please use StreamingContext.checkpoint() to set the interval."
     )
+
+    // Verify whether the DStream checkpoint is serializable
+    if (isCheckpointingEnabled) {
+      val checkpoint = new Checkpoint(this, Time.apply(0))
+      try {
+        Checkpoint.serialize(checkpoint, conf)
+      } catch {
+        case e: NotSerializableException =>
+          throw new NotSerializableException(
+            "DStream checkpointing has been enabled but the DStreams with their functions " +
+              "are not serializable\nSerialization stack:\n" +
+              SerializationDebugger.find(checkpoint).map("\t- " + _).mkString("\n")
+          )
+      }
+    }
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 4b12affbb0ddd..3a958bf3a3c19 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -17,21 +17,21 @@
 
 package org.apache.spark.streaming
 
-import java.io.File
+import java.io.{File, NotSerializableException}
 import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.{Assertions, BeforeAndAfter, FunSuite}
-import org.scalatest.concurrent.Timeouts
 import org.scalatest.concurrent.Eventually._
+import org.scalatest.concurrent.Timeouts
 import org.scalatest.exceptions.TestFailedDueToTimeoutException
 import org.scalatest.time.SpanSugar._
+import org.scalatest.{Assertions, BeforeAndAfter, FunSuite}
 
-import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.util.Utils
+import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException}
 
 
 class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts with Logging {
@@ -132,6 +132,25 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     }
   }
 
+  test("start with non-seriazable DStream checkpoints") {
+    val checkpointDir = Utils.createTempDir()
+    ssc = new StreamingContext(conf, batchDuration)
+    ssc.checkpoint(checkpointDir.getAbsolutePath)
+    addInputStream(ssc).foreachRDD { rdd =>
+      // Refer to this.appName from inside closure so that this closure refers to
+      // the instance of StreamingContextSuite, and is therefore not serializable
+      rdd.count() + appName
+    }
+
+    // Test whether start() fails early when checkpointing is enabled
+    val exception = intercept[NotSerializableException] {
+      ssc.start()
+    }
+    assert(exception.getMessage().contains("DStreams with their functions are not serializable"))
+    assert(ssc.getState() !== StreamingContextState.ACTIVE)
+    assert(StreamingContext.getActive().isEmpty)
+  }
+
   test("start multiple times") {
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()

From 7956dd7ab03e1542d89dd94c043f1e5131684199 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 20 May 2015 16:37:11 -0700
Subject: [PATCH 282/320] [SPARK-7698] Cache and reuse buffers in
 ExecutorMemoryAllocator when using heap allocation

When on-heap memory allocation is used, ExecutorMemoryManager should maintain a cache / pool of buffers for re-use by tasks. This will significantly improve the performance of the new Tungsten's sort-shuffle for jobs with many short-lived tasks by eliminating a major source of GC.

This pull request is a minimum-viable-implementation of this idea.  In its current form, this patch significantly improves performance on a stress test which launches huge numbers of short-lived shuffle map tasks back-to-back in the same JVM.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6227 from JoshRosen/SPARK-7698 and squashes the following commits:

fd6cb55 [Josh Rosen] SoftReference -> WeakReference
b154e86 [Josh Rosen] WIP sketch of pooling in ExecutorMemoryManager
---
 .../unsafe/memory/ExecutorMemoryManager.java  | 57 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java
index 62c29c8cc1e4d..cbbe8594627a5 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java
@@ -17,6 +17,12 @@
 
 package org.apache.spark.unsafe.memory;
 
+import java.lang.ref.WeakReference;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+import javax.annotation.concurrent.GuardedBy;
+
 /**
  * Manages memory for an executor. Individual operators / tasks allocate memory through
  * {@link TaskMemoryManager} objects, which obtain their memory from ExecutorMemoryManager.
@@ -33,6 +39,12 @@ public class ExecutorMemoryManager {
    */
   final boolean inHeap;
 
+  @GuardedBy("this")
+  private final Map<Long, LinkedList<WeakReference<MemoryBlock>>> bufferPoolsBySize =
+    new HashMap<Long, LinkedList<WeakReference<MemoryBlock>>>();
+
+  private static final int POOLING_THRESHOLD_BYTES = 1024 * 1024;
+
   /**
    * Construct a new ExecutorMemoryManager.
    *
@@ -43,16 +55,57 @@ public ExecutorMemoryManager(MemoryAllocator allocator) {
     this.allocator = allocator;
   }
 
+  /**
+   * Returns true if allocations of the given size should go through the pooling mechanism and
+   * false otherwise.
+   */
+  private boolean shouldPool(long size) {
+    // Very small allocations are less likely to benefit from pooling.
+    // At some point, we should explore supporting pooling for off-heap memory, but for now we'll
+    // ignore that case in the interest of simplicity.
+    return size >= POOLING_THRESHOLD_BYTES && allocator instanceof HeapMemoryAllocator;
+  }
+
   /**
    * Allocates a contiguous block of memory. Note that the allocated memory is not guaranteed
    * to be zeroed out (call `zero()` on the result if this is necessary).
    */
   MemoryBlock allocate(long size) throws OutOfMemoryError {
-    return allocator.allocate(size);
+    if (shouldPool(size)) {
+      synchronized (this) {
+        final LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
+        if (pool != null) {
+          while (!pool.isEmpty()) {
+            final WeakReference<MemoryBlock> blockReference = pool.pop();
+            final MemoryBlock memory = blockReference.get();
+            if (memory != null) {
+              assert (memory.size() == size);
+              return memory;
+            }
+          }
+          bufferPoolsBySize.remove(size);
+        }
+      }
+      return allocator.allocate(size);
+    } else {
+      return allocator.allocate(size);
+    }
   }
 
   void free(MemoryBlock memory) {
-    allocator.free(memory);
+    final long size = memory.size();
+    if (shouldPool(size)) {
+      synchronized (this) {
+        LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
+        if (pool == null) {
+          pool = new LinkedList<WeakReference<MemoryBlock>>();
+          bufferPoolsBySize.put(size, pool);
+        }
+        pool.add(new WeakReference<MemoryBlock>(memory));
+      }
+    } else {
+      allocator.free(memory);
+    }
   }
 
 }

From f2faa7af30662e3bdf15780f8719c71108f8e30b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 20 May 2015 16:42:49 -0700
Subject: [PATCH 283/320] [SPARK-7251] Perform sequential scan when iterating
 over BytesToBytesMap

This patch modifies `BytesToBytesMap.iterator()` to iterate through records in the order that they appear in the data pages rather than iterating through the hashtable pointer arrays. This results in fewer random memory accesses, significantly improving performance for scan-and-copy operations.

This is possible because our data pages are laid out as sequences of `[keyLength][data][valueLength][data]` entries.  In order to mark the end of a partially-filled data page, we write `-1` as a special end-of-page length (BytesToByesMap supports empty/zero-length keys and values, which is why we had to use a negative length).

This patch incorporates / closes #5836.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6159 from JoshRosen/SPARK-7251 and squashes the following commits:

05bd90a [Josh Rosen] Compare capacity, not size, to MAX_CAPACITY
2a20d71 [Josh Rosen] Fix maximum BytesToBytesMap capacity
bc4854b [Josh Rosen] Guard against overflow when growing BytesToBytesMap
f5feadf [Josh Rosen] Add test for iterating over an empty map
273b842 [Josh Rosen] [SPARK-7251] Perform sequential scan when iterating over entries in BytesToBytesMap
---
 unsafe/pom.xml                                |   5 +
 .../spark/unsafe/map/BytesToBytesMap.java     | 151 ++++++++++++----
 .../unsafe/map/HashMapGrowthStrategy.java     |   4 +-
 .../unsafe/memory/TaskMemoryManager.java      |   2 +-
 .../map/AbstractBytesToBytesMapSuite.java     | 165 ++++++++++++++++--
 5 files changed, 274 insertions(+), 53 deletions(-)

diff --git a/unsafe/pom.xml b/unsafe/pom.xml
index 9e151fc7a9141..2fd17267ac427 100644
--- a/unsafe/pom.xml
+++ b/unsafe/pom.xml
@@ -65,6 +65,11 @@
       <artifactId>junit-interface</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 19d6a169fd2ad..bd4ca74cc7764 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -23,6 +23,8 @@
 import java.util.LinkedList;
 import java.util.List;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.spark.unsafe.*;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.array.LongArray;
@@ -36,9 +38,8 @@
  * This is backed by a power-of-2-sized hash table, using quadratic probing with triangular numbers,
  * which is guaranteed to exhaust the space.
  * <p>
- * The map can support up to 2^31 keys because we use 32 bit MurmurHash. If the key cardinality is
- * higher than this, you should probably be using sorting instead of hashing for better cache
- * locality.
+ * The map can support up to 2^29 keys. If the key cardinality is higher than this, you should
+ * probably be using sorting instead of hashing for better cache locality.
  * <p>
  * This class is not thread safe.
  */
@@ -48,6 +49,11 @@ public final class BytesToBytesMap {
 
   private static final HashMapGrowthStrategy growthStrategy = HashMapGrowthStrategy.DOUBLING;
 
+  /**
+   * Special record length that is placed after the last record in a data page.
+   */
+  private static final int END_OF_PAGE_MARKER = -1;
+
   private final TaskMemoryManager memoryManager;
 
   /**
@@ -64,7 +70,7 @@ public final class BytesToBytesMap {
 
   /**
    * Offset into `currentDataPage` that points to the location where new data can be inserted into
-   * the page.
+   * the page. This does not incorporate the page's base offset.
    */
   private long pageCursor = 0;
 
@@ -74,6 +80,15 @@ public final class BytesToBytesMap {
    */
   private static final long PAGE_SIZE_BYTES = 1L << 26; // 64 megabytes
 
+  /**
+   * The maximum number of keys that BytesToBytesMap supports. The hash table has to be
+   * power-of-2-sized and its backing Java array can contain at most (1 << 30) elements, since
+   * that's the largest power-of-2 that's less than Integer.MAX_VALUE. We need two long array
+   * entries per key, giving us a maximum capacity of (1 << 29).
+   */
+  @VisibleForTesting
+  static final int MAX_CAPACITY = (1 << 29);
+
   // This choice of page table size and page size means that we can address up to 500 gigabytes
   // of memory.
 
@@ -143,6 +158,13 @@ public BytesToBytesMap(
     this.loadFactor = loadFactor;
     this.loc = new Location();
     this.enablePerfMetrics = enablePerfMetrics;
+    if (initialCapacity <= 0) {
+      throw new IllegalArgumentException("Initial capacity must be greater than 0");
+    }
+    if (initialCapacity > MAX_CAPACITY) {
+      throw new IllegalArgumentException(
+        "Initial capacity " + initialCapacity + " exceeds maximum capacity of " + MAX_CAPACITY);
+    }
     allocate(initialCapacity);
   }
 
@@ -162,6 +184,55 @@ public BytesToBytesMap(
    */
   public int size() { return size; }
 
+  private static final class BytesToBytesMapIterator implements Iterator<Location> {
+
+    private final int numRecords;
+    private final Iterator<MemoryBlock> dataPagesIterator;
+    private final Location loc;
+
+    private int currentRecordNumber = 0;
+    private Object pageBaseObject;
+    private long offsetInPage;
+
+    BytesToBytesMapIterator(int numRecords, Iterator<MemoryBlock> dataPagesIterator, Location loc) {
+      this.numRecords = numRecords;
+      this.dataPagesIterator = dataPagesIterator;
+      this.loc = loc;
+      if (dataPagesIterator.hasNext()) {
+        advanceToNextPage();
+      }
+    }
+
+    private void advanceToNextPage() {
+      final MemoryBlock currentPage = dataPagesIterator.next();
+      pageBaseObject = currentPage.getBaseObject();
+      offsetInPage = currentPage.getBaseOffset();
+    }
+
+    @Override
+    public boolean hasNext() {
+      return currentRecordNumber != numRecords;
+    }
+
+    @Override
+    public Location next() {
+      int keyLength = (int) PlatformDependent.UNSAFE.getLong(pageBaseObject, offsetInPage);
+      if (keyLength == END_OF_PAGE_MARKER) {
+        advanceToNextPage();
+        keyLength = (int) PlatformDependent.UNSAFE.getLong(pageBaseObject, offsetInPage);
+      }
+      loc.with(pageBaseObject, offsetInPage);
+      offsetInPage += 8 + 8 + keyLength + loc.getValueLength();
+      currentRecordNumber++;
+      return loc;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
   /**
    * Returns an iterator for iterating over the entries of this map.
    *
@@ -171,27 +242,7 @@ public BytesToBytesMap(
    * `lookup()`, the behavior of the returned iterator is undefined.
    */
   public Iterator<Location> iterator() {
-    return new Iterator<Location>() {
-
-      private int nextPos = bitset.nextSetBit(0);
-
-      @Override
-      public boolean hasNext() {
-        return nextPos != -1;
-      }
-
-      @Override
-      public Location next() {
-        final int pos = nextPos;
-        nextPos = bitset.nextSetBit(nextPos + 1);
-        return loc.with(pos, 0, true);
-      }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException();
-      }
-    };
+    return new BytesToBytesMapIterator(size, dataPages.iterator(), loc);
   }
 
   /**
@@ -268,8 +319,11 @@ public final class Location {
     private int valueLength;
 
     private void updateAddressesAndSizes(long fullKeyAddress) {
-        final Object page = memoryManager.getPage(fullKeyAddress);
-        final long keyOffsetInPage = memoryManager.getOffsetInPage(fullKeyAddress);
+      updateAddressesAndSizes(
+        memoryManager.getPage(fullKeyAddress), memoryManager.getOffsetInPage(fullKeyAddress));
+    }
+
+    private void updateAddressesAndSizes(Object page, long keyOffsetInPage) {
         long position = keyOffsetInPage;
         keyLength = (int) PlatformDependent.UNSAFE.getLong(page, position);
         position += 8; // word used to store the key size
@@ -291,6 +345,12 @@ Location with(int pos, int keyHashcode, boolean isDefined) {
       return this;
     }
 
+    Location with(Object page, long keyOffsetInPage) {
+      this.isDefined = true;
+      updateAddressesAndSizes(page, keyOffsetInPage);
+      return this;
+    }
+
     /**
      * Returns true if the key is defined at this position, and false otherwise.
      */
@@ -345,6 +405,8 @@ public int getValueLength() {
      * <p>
      * It is only valid to call this method immediately after calling `lookup()` using the same key.
      * <p>
+     * The key and value must be word-aligned (that is, their sizes must multiples of 8).
+     * <p>
      * After calling this method, calls to `get[Key|Value]Address()` and `get[Key|Value]Length`
      * will return information on the data stored by this `putNewKey` call.
      * <p>
@@ -370,17 +432,27 @@ public void putNewKey(
       isDefined = true;
       assert (keyLengthBytes % 8 == 0);
       assert (valueLengthBytes % 8 == 0);
+      if (size == MAX_CAPACITY) {
+        throw new IllegalStateException("BytesToBytesMap has reached maximum capacity");
+      }
       // Here, we'll copy the data into our data pages. Because we only store a relative offset from
       // the key address instead of storing the absolute address of the value, the key and value
       // must be stored in the same memory page.
       // (8 byte key length) (key) (8 byte value length) (value)
       final long requiredSize = 8 + keyLengthBytes + 8 + valueLengthBytes;
-      assert(requiredSize <= PAGE_SIZE_BYTES);
+      assert (requiredSize <= PAGE_SIZE_BYTES - 8); // Reserve 8 bytes for the end-of-page marker.
       size++;
       bitset.set(pos);
 
-      // If there's not enough space in the current page, allocate a new page:
-      if (currentDataPage == null || PAGE_SIZE_BYTES - pageCursor < requiredSize) {
+      // If there's not enough space in the current page, allocate a new page (8 bytes are reserved
+      // for the end-of-page marker).
+      if (currentDataPage == null || PAGE_SIZE_BYTES - 8 - pageCursor < requiredSize) {
+        if (currentDataPage != null) {
+          // There wasn't enough space in the current page, so write an end-of-page marker:
+          final Object pageBaseObject = currentDataPage.getBaseObject();
+          final long lengthOffsetInPage = currentDataPage.getBaseOffset() + pageCursor;
+          PlatformDependent.UNSAFE.putLong(pageBaseObject, lengthOffsetInPage, END_OF_PAGE_MARKER);
+        }
         MemoryBlock newPage = memoryManager.allocatePage(PAGE_SIZE_BYTES);
         dataPages.add(newPage);
         pageCursor = 0;
@@ -414,7 +486,7 @@ public void putNewKey(
       longArray.set(pos * 2 + 1, keyHashcode);
       updateAddressesAndSizes(storedKeyAddress);
       isDefined = true;
-      if (size > growthThreshold) {
+      if (size > growthThreshold && longArray.size() < MAX_CAPACITY) {
         growAndRehash();
       }
     }
@@ -427,8 +499,11 @@ public void putNewKey(
    * @param capacity the new map capacity
    */
   private void allocate(int capacity) {
-    capacity = Math.max((int) Math.min(Integer.MAX_VALUE, nextPowerOf2(capacity)), 64);
-    longArray = new LongArray(memoryManager.allocate(capacity * 8 * 2));
+    assert (capacity >= 0);
+    // The capacity needs to be divisible by 64 so that our bit set can be sized properly
+    capacity = Math.max((int) Math.min(MAX_CAPACITY, nextPowerOf2(capacity)), 64);
+    assert (capacity <= MAX_CAPACITY);
+    longArray = new LongArray(memoryManager.allocate(capacity * 8L * 2));
     bitset = new BitSet(MemoryBlock.fromLongArray(new long[capacity / 64]));
 
     this.growthThreshold = (int) (capacity * loadFactor);
@@ -494,10 +569,16 @@ public long getNumHashCollisions() {
     return numHashCollisions;
   }
 
+  @VisibleForTesting
+  int getNumDataPages() {
+    return dataPages.size();
+  }
+
   /**
    * Grows the size of the hash table and re-hash everything.
    */
-  private void growAndRehash() {
+  @VisibleForTesting
+  void growAndRehash() {
     long resizeStartTime = -1;
     if (enablePerfMetrics) {
       resizeStartTime = System.nanoTime();
@@ -508,7 +589,7 @@ private void growAndRehash() {
     final int oldCapacity = (int) oldBitSet.capacity();
 
     // Allocate the new data structures
-    allocate(Math.min(Integer.MAX_VALUE, growthStrategy.nextCapacity(oldCapacity)));
+    allocate(Math.min(growthStrategy.nextCapacity(oldCapacity), MAX_CAPACITY));
 
     // Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
     for (int pos = oldBitSet.nextSetBit(0); pos >= 0; pos = oldBitSet.nextSetBit(pos + 1)) {
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java b/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java
index 7c321baffe82d..20654e4eeaa02 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java
@@ -32,7 +32,9 @@ public interface HashMapGrowthStrategy {
   class Doubling implements HashMapGrowthStrategy {
     @Override
     public int nextCapacity(int currentCapacity) {
-      return currentCapacity * 2;
+      assert (currentCapacity > 0);
+      // Guard against overflow
+      return (currentCapacity * 2 > 0) ? (currentCapacity * 2) : Integer.MAX_VALUE;
     }
   }
 
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java
index 2906ac8abad1a..10881969dbc78 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java
@@ -44,7 +44,7 @@
  * maximum size of a long[] array, allowing us to address 8192 * 2^32 * 8 bytes, which is
  * approximately 35 terabytes of memory.
  */
-public final class TaskMemoryManager {
+public class TaskMemoryManager {
 
   private final Logger logger = LoggerFactory.getLogger(TaskMemoryManager.class);
 
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 7a5c0622d1ffb..81315f7c94645 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -25,24 +25,40 @@
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import static org.mockito.AdditionalMatchers.geq;
+import static org.mockito.Mockito.*;
 
 import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.memory.*;
 import org.apache.spark.unsafe.PlatformDependent;
 import static org.apache.spark.unsafe.PlatformDependent.BYTE_ARRAY_OFFSET;
-import org.apache.spark.unsafe.memory.ExecutorMemoryManager;
-import org.apache.spark.unsafe.memory.MemoryAllocator;
-import org.apache.spark.unsafe.memory.MemoryLocation;
-import org.apache.spark.unsafe.memory.TaskMemoryManager;
+import static org.apache.spark.unsafe.PlatformDependent.LONG_ARRAY_OFFSET;
+
 
 public abstract class AbstractBytesToBytesMapSuite {
 
   private final Random rand = new Random(42);
 
   private TaskMemoryManager memoryManager;
+  private TaskMemoryManager sizeLimitedMemoryManager;
 
   @Before
   public void setup() {
     memoryManager = new TaskMemoryManager(new ExecutorMemoryManager(getMemoryAllocator()));
+    // Mocked memory manager for tests that check the maximum array size, since actually allocating
+    // such large arrays will cause us to run out of memory in our tests.
+    sizeLimitedMemoryManager = spy(memoryManager);
+    when(sizeLimitedMemoryManager.allocate(geq(1L << 20))).thenAnswer(new Answer<MemoryBlock>() {
+      @Override
+      public MemoryBlock answer(InvocationOnMock invocation) throws Throwable {
+        if (((Long) invocation.getArguments()[0] / 8) > Integer.MAX_VALUE) {
+          throw new OutOfMemoryError("Requested array size exceeds VM limit");
+        }
+        return memoryManager.allocate(1L << 20);
+      }
+    });
   }
 
   @After
@@ -101,6 +117,7 @@ public void emptyMap() {
       final int keyLengthInBytes = keyLengthInWords * 8;
       final byte[] key = getRandomByteArray(keyLengthInWords);
       Assert.assertFalse(map.lookup(key, BYTE_ARRAY_OFFSET, keyLengthInBytes).isDefined());
+      Assert.assertFalse(map.iterator().hasNext());
     } finally {
       map.free();
     }
@@ -159,7 +176,7 @@ public void setAndRetrieveAKey() {
 
   @Test
   public void iteratorTest() throws Exception {
-    final int size = 128;
+    final int size = 4096;
     BytesToBytesMap map = new BytesToBytesMap(memoryManager, size / 2);
     try {
       for (long i = 0; i < size; i++) {
@@ -167,14 +184,26 @@ public void iteratorTest() throws Exception {
         final BytesToBytesMap.Location loc =
           map.lookup(value, PlatformDependent.LONG_ARRAY_OFFSET, 8);
         Assert.assertFalse(loc.isDefined());
-        loc.putNewKey(
-          value,
-          PlatformDependent.LONG_ARRAY_OFFSET,
-          8,
-          value,
-          PlatformDependent.LONG_ARRAY_OFFSET,
-          8
-        );
+        // Ensure that we store some zero-length keys
+        if (i % 5 == 0) {
+          loc.putNewKey(
+            null,
+            PlatformDependent.LONG_ARRAY_OFFSET,
+            0,
+            value,
+            PlatformDependent.LONG_ARRAY_OFFSET,
+            8
+          );
+        } else {
+          loc.putNewKey(
+            value,
+            PlatformDependent.LONG_ARRAY_OFFSET,
+            8,
+            value,
+            PlatformDependent.LONG_ARRAY_OFFSET,
+            8
+          );
+        }
       }
       final java.util.BitSet valuesSeen = new java.util.BitSet(size);
       final Iterator<BytesToBytesMap.Location> iter = map.iterator();
@@ -183,11 +212,16 @@ public void iteratorTest() throws Exception {
         Assert.assertTrue(loc.isDefined());
         final MemoryLocation keyAddress = loc.getKeyAddress();
         final MemoryLocation valueAddress = loc.getValueAddress();
-        final long key =  PlatformDependent.UNSAFE.getLong(
-          keyAddress.getBaseObject(), keyAddress.getBaseOffset());
         final long value = PlatformDependent.UNSAFE.getLong(
           valueAddress.getBaseObject(), valueAddress.getBaseOffset());
-        Assert.assertEquals(key, value);
+        final long keyLength = loc.getKeyLength();
+        if (keyLength == 0) {
+          Assert.assertTrue("value " + value + " was not divisible by 5", value % 5 == 0);
+        } else {
+        final long key = PlatformDependent.UNSAFE.getLong(
+          keyAddress.getBaseObject(), keyAddress.getBaseOffset());
+          Assert.assertEquals(value, key);
+        }
         valuesSeen.set((int) value);
       }
       Assert.assertEquals(size, valuesSeen.cardinality());
@@ -196,6 +230,74 @@ public void iteratorTest() throws Exception {
     }
   }
 
+  @Test
+  public void iteratingOverDataPagesWithWastedSpace() throws Exception {
+    final int NUM_ENTRIES = 1000 * 1000;
+    final int KEY_LENGTH = 16;
+    final int VALUE_LENGTH = 40;
+    final BytesToBytesMap map = new BytesToBytesMap(memoryManager, NUM_ENTRIES);
+    // Each record will take 8 + 8 + 16 + 40 = 72 bytes of space in the data page. Our 64-megabyte
+    // pages won't be evenly-divisible by records of this size, which will cause us to waste some
+    // space at the end of the page. This is necessary in order for us to take the end-of-record
+    // handling branch in iterator().
+    try {
+      for (int i = 0; i < NUM_ENTRIES; i++) {
+        final long[] key = new long[] { i, i };  // 2 * 8 = 16 bytes
+        final long[] value = new long[] { i, i, i, i, i }; // 5 * 8 = 40 bytes
+        final BytesToBytesMap.Location loc = map.lookup(
+          key,
+          LONG_ARRAY_OFFSET,
+          KEY_LENGTH
+        );
+        Assert.assertFalse(loc.isDefined());
+        loc.putNewKey(
+          key,
+          LONG_ARRAY_OFFSET,
+          KEY_LENGTH,
+          value,
+          LONG_ARRAY_OFFSET,
+          VALUE_LENGTH
+        );
+      }
+      Assert.assertEquals(2, map.getNumDataPages());
+
+      final java.util.BitSet valuesSeen = new java.util.BitSet(NUM_ENTRIES);
+      final Iterator<BytesToBytesMap.Location> iter = map.iterator();
+      final long key[] = new long[KEY_LENGTH / 8];
+      final long value[] = new long[VALUE_LENGTH / 8];
+      while (iter.hasNext()) {
+        final BytesToBytesMap.Location loc = iter.next();
+        Assert.assertTrue(loc.isDefined());
+        Assert.assertEquals(KEY_LENGTH, loc.getKeyLength());
+        Assert.assertEquals(VALUE_LENGTH, loc.getValueLength());
+        PlatformDependent.copyMemory(
+          loc.getKeyAddress().getBaseObject(),
+          loc.getKeyAddress().getBaseOffset(),
+          key,
+          LONG_ARRAY_OFFSET,
+          KEY_LENGTH
+        );
+        PlatformDependent.copyMemory(
+          loc.getValueAddress().getBaseObject(),
+          loc.getValueAddress().getBaseOffset(),
+          value,
+          LONG_ARRAY_OFFSET,
+          VALUE_LENGTH
+        );
+        for (long j : key) {
+          Assert.assertEquals(key[0], j);
+        }
+        for (long j : value) {
+          Assert.assertEquals(key[0], j);
+        }
+        valuesSeen.set((int) key[0]);
+      }
+      Assert.assertEquals(NUM_ENTRIES, valuesSeen.cardinality());
+    } finally {
+      map.free();
+    }
+  }
+
   @Test
   public void randomizedStressTest() {
     final int size = 65536;
@@ -247,4 +349,35 @@ public void randomizedStressTest() {
       map.free();
     }
   }
+
+  @Test
+  public void initialCapacityBoundsChecking() {
+    try {
+      new BytesToBytesMap(sizeLimitedMemoryManager, 0);
+      Assert.fail("Expected IllegalArgumentException to be thrown");
+    } catch (IllegalArgumentException e) {
+      // expected exception
+    }
+
+    try {
+      new BytesToBytesMap(sizeLimitedMemoryManager, BytesToBytesMap.MAX_CAPACITY + 1);
+      Assert.fail("Expected IllegalArgumentException to be thrown");
+    } catch (IllegalArgumentException e) {
+      // expected exception
+    }
+
+   // Can allocate _at_ the max capacity
+    BytesToBytesMap map =
+      new BytesToBytesMap(sizeLimitedMemoryManager, BytesToBytesMap.MAX_CAPACITY);
+    map.free();
+  }
+
+  @Test
+  public void resizingLargeMap() {
+    // As long as a map's capacity is below the max, we should be able to resize up to the max
+    BytesToBytesMap map =
+      new BytesToBytesMap(sizeLimitedMemoryManager, BytesToBytesMap.MAX_CAPACITY - 64);
+    map.growAndRehash();
+    map.free();
+  }
 }

From c330e52dae6a3ec7e67ca82e2c2f4ea873976458 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 20 May 2015 17:26:26 -0700
Subject: [PATCH 284/320] [SPARK-7762] [MLLIB] set default value for outputCol

Set a default value for `outputCol` instead of forcing users to name it. This is useful for intermediate transformers in the pipeline. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6289 from mengxr/SPARK-7762 and squashes the following commits:

54edebc [Xiangrui Meng] merge master
bff8667 [Xiangrui Meng] update unit test
171246b [Xiangrui Meng] add unit test for outputCol
a4321bd [Xiangrui Meng] set default value for outputCol
---
 .../ml/param/shared/SharedParamsCodeGen.scala |  2 +-
 .../spark/ml/param/shared/sharedParams.scala  |  4 ++-
 .../ml/param/shared/SharedParamsSuite.scala   | 35 +++++++++++++++++++
 .../ml/param/_shared_params_code_gen.py       |  2 +-
 python/pyspark/ml/param/shared.py             |  3 +-
 5 files changed, 42 insertions(+), 4 deletions(-)
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 8b8cb81373a65..1ffb5eddc36bd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -49,7 +49,7 @@ private[shared] object SharedParamsCodeGen {
         isValid = "ParamValidators.inRange(0, 1)"),
       ParamDesc[String]("inputCol", "input column name"),
       ParamDesc[Array[String]]("inputCols", "input column names"),
-      ParamDesc[String]("outputCol", "output column name"),
+      ParamDesc[String]("outputCol", "output column name", Some("uid + \"__output\"")),
       ParamDesc[Int]("checkpointInterval", "checkpoint interval (>= 1)",
         isValid = "ParamValidators.gtEq(1)"),
       ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", Some("true")),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 3a4976d3ddcd1..ed08417bd4df8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -185,7 +185,7 @@ private[ml] trait HasInputCols extends Params {
 }
 
 /**
- * (private[ml]) Trait for shared param outputCol.
+ * (private[ml]) Trait for shared param outputCol (default: uid + "__output").
  */
 private[ml] trait HasOutputCol extends Params {
 
@@ -195,6 +195,8 @@ private[ml] trait HasOutputCol extends Params {
    */
   final val outputCol: Param[String] = new Param[String](this, "outputCol", "output column name")
 
+  setDefault(outputCol, uid + "__output")
+
   /** @group getParam */
   final def getOutputCol: String = $(outputCol)
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala
new file mode 100644
index 0000000000000..ca18fa1ad3c15
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param.shared
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.param.Params
+
+class SharedParamsSuite extends FunSuite {
+
+  test("outputCol") {
+
+    class Obj(override val uid: String) extends Params with HasOutputCol
+
+    val obj = new Obj("obj")
+
+    assert(obj.hasDefault(obj.outputCol))
+    assert(obj.getOrDefault(obj.outputCol) === "obj__output")
+  }
+}
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index ccb929af184b8..69efc424ec4ef 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -116,7 +116,7 @@ def get$Name(self):
         ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
         ("inputCol", "input column name", None),
         ("inputCols", "input column names", None),
-        ("outputCol", "output column name", None),
+        ("outputCol", "output column name", "self.uid + '__output'"),
         ("numFeatures", "number of features", None),
         ("checkpointInterval", "checkpoint interval (>= 1)", None),
         ("seed", "random seed", "hash(type(self).__name__)"),
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 0b93788899124..bc088e4c29e26 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -280,6 +280,7 @@ def __init__(self):
         super(HasOutputCol, self).__init__()
         #: param for output column name
         self.outputCol = Param(self, "outputCol", "output column name")
+        self._setDefault(outputCol=self.uid + '__output')
 
     def setOutputCol(self, value):
         """
@@ -459,7 +460,7 @@ def __init__(self):
         self.maxMemoryInMB = Param(self, "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.")
         #: param for If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.
         self.cacheNodeIds = Param(self, "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.")
-
+        
     def setMaxDepth(self, value):
         """
         Sets the value of :py:attr:`maxDepth`.

From 5196efff53af4965ff216a9d5c0f8b2b4fc98652 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 20 May 2015 17:52:50 -0700
Subject: [PATCH 285/320] [SPARK-7719] Re-add UnsafeShuffleWriterSuite test
 that was removed for Java 6 compat

This patch re-adds a test which was removed in 9ebb44f8abb1a13f045eed60190954db904ffef7 due to a Java 6 compatibility issue.  We now use Guava's `Iterators.emptyIterator()` in place of `Collections.emptyIterator()`, which isn't present in all Java 6 versions.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #6298 from JoshRosen/SPARK-7719-fix-java-6-test-code and squashes the following commits:

5c9bd85 [Josh Rosen] Re-add UnsafeShuffleWriterSuite.emptyIterator() test which was removed due to Java 6 issue
---
 .../shuffle/unsafe/UnsafeShuffleWriterSuite.java  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
index 03116d8fc2b21..83d109115aa5c 100644
--- a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java
@@ -26,6 +26,7 @@
 import scala.reflect.ClassTag;
 import scala.runtime.AbstractFunction1;
 
+import com.google.common.collect.Iterators;
 import com.google.common.collect.HashMultiset;
 import com.google.common.io.ByteStreams;
 import org.junit.After;
@@ -252,6 +253,20 @@ public void doNotNeedToCallWriteBeforeUnsuccessfulStop() throws IOException {
     createWriter(false).stop(false);
   }
 
+  @Test
+  public void writeEmptyIterator() throws Exception {
+    final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
+    writer.write(Iterators.<Product2<Object, Object>>emptyIterator());
+    final Option<MapStatus> mapStatus = writer.stop(true);
+    assertTrue(mapStatus.isDefined());
+    assertTrue(mergedOutputFile.exists());
+    assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile);
+    assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleRecordsWritten());
+    assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleBytesWritten());
+    assertEquals(0, taskMetrics.diskBytesSpilled());
+    assertEquals(0, taskMetrics.memoryBytesSpilled());
+  }
+
   @Test
   public void writeWithoutSpilling() throws Exception {
     // In this example, each partition should have exactly one record:

From a70bf06b790add5f279a69607df89ed36155b0e4 Mon Sep 17 00:00:00 2001
From: Hari Shreedharan <hshreedharan@apache.org>
Date: Wed, 20 May 2015 21:13:10 -0500
Subject: [PATCH 286/320] =?UTF-8?q?[SPARK-7750]=20[WEBUI]=20Rename=20endpo?=
 =?UTF-8?q?ints=20from=20`json`=20to=20`api`=20to=20allow=20fu=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…rther extension to non-json outputs too.

Author: Hari Shreedharan <hshreedharan@apache.org>

Closes #6273 from harishreedharan/json-to-api and squashes the following commits:

e14b73b [Hari Shreedharan] Rename `getJsonServlet` to `getServletHandler` i
42f8acb [Hari Shreedharan] Import order fixes.
2ef852f [Hari Shreedharan] [SPARK-7750][WebUI] Rename endpoints from `json` to `api` to allow further extension to non-json outputs too.
---
 .../spark/deploy/history/HistoryServer.scala     |  5 +++--
 .../spark/deploy/master/ui/MasterWebUI.scala     |  5 +++--
 ...nRootResource.scala => ApiRootResource.scala} |  8 ++++----
 .../main/scala/org/apache/spark/ui/SparkUI.scala |  5 +++--
 .../deploy/history/HistoryServerSuite.scala      |  4 ++--
 .../org/apache/spark/ui/UISeleniumSuite.scala    | 16 ++++++++--------
 docs/monitoring.md                               | 10 +++++-----
 7 files changed, 28 insertions(+), 25 deletions(-)
 rename core/src/main/scala/org/apache/spark/status/api/v1/{JsonRootResource.scala => ApiRootResource.scala} (97%)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 517cbe5176241..5a0eb585a9049 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -25,7 +25,8 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.status.api.v1.{ApplicationInfo, ApplicationsListResource, JsonRootResource, UIRoot}
+import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource,
+  UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.{SignalLogger, Utils}
@@ -125,7 +126,7 @@ class HistoryServer(
   def initialize() {
     attachPage(new HistoryPage(this))
 
-    attachHandler(JsonRootResource.getJsonServlet(this))
+    attachHandler(ApiRootResource.getServletHandler(this))
 
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index eb26e9f99c70b..2111a8581f2e4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -19,7 +19,8 @@ package org.apache.spark.deploy.master.ui
 
 import org.apache.spark.Logging
 import org.apache.spark.deploy.master.Master
-import org.apache.spark.status.api.v1.{ApplicationsListResource, ApplicationInfo, JsonRootResource, UIRoot}
+import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationsListResource, ApplicationInfo,
+  UIRoot}
 import org.apache.spark.ui.{SparkUI, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.RpcUtils
@@ -47,7 +48,7 @@ class MasterWebUI(val master: Master, requestedPort: Int)
     attachPage(new HistoryNotFoundPage(this))
     attachPage(masterPage)
     attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static"))
-    attachHandler(JsonRootResource.getJsonServlet(this))
+    attachHandler(ApiRootResource.getServletHandler(this))
     attachHandler(createRedirectHandler(
       "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST")))
     attachHandler(createRedirectHandler(
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
similarity index 97%
rename from core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
rename to core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index c3ec45f54681b..bf2cc2e72f1fe 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/JsonRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -39,7 +39,7 @@ import org.apache.spark.ui.SparkUI
  * HistoryServerSuite.
  */
 @Path("/v1")
-private[v1] class JsonRootResource extends UIRootFromServletContext {
+private[v1] class ApiRootResource extends UIRootFromServletContext {
 
   @Path("applications")
   def getApplicationList(): ApplicationListResource = {
@@ -166,11 +166,11 @@ private[v1] class JsonRootResource extends UIRootFromServletContext {
 
 }
 
-private[spark] object JsonRootResource {
+private[spark] object ApiRootResource {
 
-  def getJsonServlet(uiRoot: UIRoot): ServletContextHandler = {
+  def getServletHandler(uiRoot: UIRoot): ServletContextHandler = {
     val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS)
-    jerseyContext.setContextPath("/json")
+    jerseyContext.setContextPath("/api")
     val holder:ServletHolder = new ServletHolder(classOf[ServletContainer])
     holder.setInitParameter("com.sun.jersey.config.property.resourceConfigClass",
       "com.sun.jersey.api.core.PackagesResourceConfig")
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index bfe4a180e8a6f..0b11e914bb251 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -19,7 +19,8 @@ package org.apache.spark.ui
 
 import java.util.Date
 
-import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo, JsonRootResource, UIRoot}
+import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationAttemptInfo, ApplicationInfo,
+  UIRoot}
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.StorageStatusListener
@@ -64,7 +65,7 @@ private[spark] class SparkUI private (
     attachTab(new ExecutorsTab(this))
     attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
     attachHandler(createRedirectHandler("/", "/jobs", basePath = basePath))
-    attachHandler(JsonRootResource.getJsonServlet(this))
+    attachHandler(ApiRootResource.getServletHandler(this))
     // This should be POST only, but, the YARN AM proxy won't proxy POSTs
     attachHandler(createRedirectHandler(
       "/stages/stage/kill", "/stages", stagesTab.handleKillRequest,
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 318ab5dbc4804..4adb5122bcf1a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -198,11 +198,11 @@ class HistoryServerSuite extends FunSuite with BeforeAndAfter with Matchers with
   }
 
   def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
-    HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/json/v1/$path"))
+    HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/api/v1/$path"))
   }
 
   def getUrl(path: String): String = {
-    HistoryServerSuite.getUrl(new URL(s"http://localhost:$port/json/v1/$path"))
+    HistoryServerSuite.getUrl(new URL(s"http://localhost:$port/api/v1/$path"))
   }
 
   def generateExpectation(name: String, path: String): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index 117b2c3960820..b6f5accef0cef 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -497,7 +497,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       goToUi(sc, "/jobs/job/?id=7")
       find("no-info").get.text should be ("No information to display for job 7")
 
-      val badJob = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get, "jobs/7"))
+      val badJob = HistoryServerSuite.getContentAndCode(apiUrl(sc.ui.get, "jobs/7"))
       badJob._1 should be (HttpServletResponse.SC_NOT_FOUND)
       badJob._2 should be (None)
       badJob._3 should be (Some("unknown job: 7"))
@@ -540,18 +540,18 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
 
       goToUi(sc, "/stages/stage/?id=12&attempt=0")
       find("no-info").get.text should be ("No information to display for Stage 12 (Attempt 0)")
-      val badStage = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get,"stages/12/0"))
+      val badStage = HistoryServerSuite.getContentAndCode(apiUrl(sc.ui.get,"stages/12/0"))
       badStage._1 should be (HttpServletResponse.SC_NOT_FOUND)
       badStage._2 should be (None)
       badStage._3 should be (Some("unknown stage: 12"))
 
-      val badAttempt = HistoryServerSuite.getContentAndCode(jsonUrl(sc.ui.get,"stages/19/15"))
+      val badAttempt = HistoryServerSuite.getContentAndCode(apiUrl(sc.ui.get,"stages/19/15"))
       badAttempt._1 should be (HttpServletResponse.SC_NOT_FOUND)
       badAttempt._2 should be (None)
       badAttempt._3 should be (Some("unknown attempt for stage 19.  Found attempts: [0]"))
 
       val badStageAttemptList = HistoryServerSuite.getContentAndCode(
-        jsonUrl(sc.ui.get, "stages/12"))
+        apiUrl(sc.ui.get, "stages/12"))
       badStageAttemptList._1 should be (HttpServletResponse.SC_NOT_FOUND)
       badStageAttemptList._2 should be (None)
       badStageAttemptList._3 should be (Some("unknown stage: 12"))
@@ -561,7 +561,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
   test("live UI json application list") {
     withSpark(newSparkContext()) { sc =>
       val appListRawJson = HistoryServerSuite.getUrl(new URL(
-        sc.ui.get.appUIAddress + "/json/v1/applications"))
+        sc.ui.get.appUIAddress + "/api/v1/applications"))
       val appListJsonAst = JsonMethods.parse(appListRawJson)
       appListJsonAst.children.length should be (1)
       val attempts = (appListJsonAst \ "attempts").children
@@ -587,10 +587,10 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
   }
 
   def getJson(ui: SparkUI, path: String): JValue = {
-    JsonMethods.parse(HistoryServerSuite.getUrl(jsonUrl(ui, path)))
+    JsonMethods.parse(HistoryServerSuite.getUrl(apiUrl(ui, path)))
   }
 
-  def jsonUrl(ui: SparkUI, path: String): URL = {
-    new URL(ui.appUIAddress + "/json/v1/applications/test/" + path)
+  def apiUrl(ui: SparkUI, path: String): URL = {
+    new URL(ui.appUIAddress + "/api/v1/applications/test/" + path)
   }
 }
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 1e0fc150862fb..e75018499003a 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -178,9 +178,9 @@ Note that the history server only displays completed Spark jobs. One way to sign
 
 In addition to viewing the metrics in the UI, they are also available as JSON.  This gives developers
 an easy way to create new visualizations and monitoring tools for Spark.  The JSON is available for
-both running applications, and in the history server.  The endpoints are mounted at `/json/v1`.  Eg.,
-for the history server, they would typically be accessible at `http://<server-url>:18080/json/v1`, and
-for a running application, at `http://localhost:4040/json/v1`.
+both running applications, and in the history server.  The endpoints are mounted at `/api/v1`.  Eg.,
+for the history server, they would typically be accessible at `http://<server-url>:18080/api/v1`, and
+for a running application, at `http://localhost:4040/api/v1`.
 
 <table class="table">
   <tr><th>Endpoint</th><th>Meaning</th></tr>
@@ -240,12 +240,12 @@ These endpoints have been strongly versioned to make it easier to develop applic
 * Individual fields will never be removed for any given endpoint
 * New endpoints may be added
 * New fields may be added to existing endpoints
-* New versions of the api may be added in the future at a separate endpoint (eg., `json/v2`).  New versions are *not* required to be backwards compatible.
+* New versions of the api may be added in the future at a separate endpoint (eg., `api/v2`).  New versions are *not* required to be backwards compatible.
 * Api versions may be dropped, but only after at least one minor release of co-existing with a new api version
 
 Note that even when examining the UI of a running applications, the `applications/[app-id]` portion is
 still required, though there is only one application available.  Eg. to see the list of jobs for the
-running app, you would go to `http://localhost:4040/json/v1/applications/[app-id]/jobs`.  This is to
+running app, you would go to `http://localhost:4040/api/v1/applications/[app-id]/jobs`.  This is to
 keep the paths consistent in both modes.
 
 # Metrics

From 895baf8f77e630ce32b0e25b00bf5ee45d17398f Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Wed, 20 May 2015 19:56:01 -0700
Subject: [PATCH 287/320] [SPARK-7777] [STREAMING] Fix the flaky test in
 org.apache.spark.streaming.BasicOperationsSuite

Just added a guard to make sure a batch has completed before moving to the next batch.

Author: zsxwing <zsxwing@gmail.com>

Closes #6306 from zsxwing/SPARK-7777 and squashes the following commits:

ecee529 [zsxwing] Fix the failure message
58634fe [zsxwing] Fix the flaky test in org.apache.spark.streaming.BasicOperationsSuite
---
 .../org/apache/spark/streaming/BasicOperationsSuite.scala  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 87bc20f79c3cd..f269cb74e0c2b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -557,6 +557,9 @@ class BasicOperationsSuite extends TestSuiteBase {
     withTestServer(new TestServer()) { testServer =>
       withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
         testServer.start()
+
+        val batchCounter = new BatchCounter(ssc)
+
         // Set up the streaming context and input streams
         val networkStream =
           ssc.socketTextStream("localhost", testServer.port, StorageLevel.MEMORY_AND_DISK)
@@ -587,7 +590,11 @@ class BasicOperationsSuite extends TestSuiteBase {
         for (i <- 0 until input.size) {
           testServer.send(input(i).toString + "\n")
           Thread.sleep(200)
+          val numCompletedBatches = batchCounter.getNumCompletedBatches
           clock.advance(batchDuration.milliseconds)
+          if (!batchCounter.waitUntilBatchesCompleted(numCompletedBatches + 1, 5000)) {
+            fail("Batch took more than 5 seconds to complete")
+          }
           collectRddInfo()
         }
 

From 42c592adb381ff20832cce55e0849ed68dd7eee4 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Wed, 20 May 2015 19:58:22 -0700
Subject: [PATCH 288/320] [SPARK-7320] [SQL] Add Cube / Rollup for dataframe

This is a follow up for #6257, which broke the maven test.

Add cube & rollup for DataFrame
For example:
```scala
testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b"))
testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b"))
```

Author: Cheng Hao <hao.cheng@intel.com>

Closes #6304 from chenghao-intel/rollup and squashes the following commits:

04bb1de [Cheng Hao] move the table register/unregister into beforeAll/afterAll
a6069f1 [Cheng Hao] cancel the implicit keyword
ced4b8f [Cheng Hao] remove the unnecessary code changes
9959dfa [Cheng Hao] update the code as comments
e1d88aa [Cheng Hao] update the code as suggested
03bc3d9 [Cheng Hao] Remove the CubedData & RollupedData
5fd62d0 [Cheng Hao] hiden the CubedData & RollupedData
5ffb196 [Cheng Hao] Add Cube / Rollup for dataframe
---
 .../org/apache/spark/sql/DataFrame.scala      | 104 +++++++++++++++++-
 .../org/apache/spark/sql/GroupedData.scala    |  92 +++++++++++-----
 .../hive/HiveDataFrameAnalyticsSuite.scala    |  69 ++++++++++++
 3 files changed, 237 insertions(+), 28 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index adad85806d1ea..d78b4c2f8909c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -685,7 +685,53 @@ class DataFrame private[sql](
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
+  def groupBy(cols: Column*): GroupedData = {
+    GroupedData(this, cols.map(_.expr), GroupedData.GroupByType)
+  }
+
+  /**
+   * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolluped by department and group.
+   *   df.rollup($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, rolluped by department and gender.
+   *   df.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def rollup(cols: Column*): GroupedData = {
+    GroupedData(this, cols.map(_.expr), GroupedData.RollupType)
+  }
+
+  /**
+   * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   df.cube($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   df.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def cube(cols: Column*): GroupedData = GroupedData(this, cols.map(_.expr), GroupedData.CubeType)
 
   /**
    * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
@@ -710,7 +756,61 @@ class DataFrame private[sql](
   @scala.annotation.varargs
   def groupBy(col1: String, cols: String*): GroupedData = {
     val colNames: Seq[String] = col1 +: cols
-    new GroupedData(this, colNames.map(colName => resolve(colName)))
+    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.GroupByType)
+  }
+
+  /**
+   * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * This is a variant of rollup that can only group by existing columns using column names
+   * (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolluped by department and group.
+   *   df.rollup("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, rolluped by department and gender.
+   *   df.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def rollup(col1: String, cols: String*): GroupedData = {
+    val colNames: Seq[String] = col1 +: cols
+    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.RollupType)
+  }
+
+  /**
+   * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns,
+   * so we can run aggregation on them.
+   * See [[GroupedData]] for all the available aggregate functions.
+   *
+   * This is a variant of cube that can only group by existing columns using column names
+   * (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   df.cube("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   df.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group dfops
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def cube(col1: String, cols: String*): GroupedData = {
+    val colNames: Seq[String] = col1 +: cols
+    GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.CubeType)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
index 1381b9f1a6080..f730e4ae00e2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -23,9 +23,40 @@ import scala.language.implicitConversions
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.analysis.Star
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.Aggregate
+import org.apache.spark.sql.catalyst.plans.logical.{Rollup, Cube, Aggregate}
 import org.apache.spark.sql.types.NumericType
 
+/**
+ * Companion object for GroupedData
+ */
+private[sql] object GroupedData {
+  def apply(
+      df: DataFrame,
+      groupingExprs: Seq[Expression],
+      groupType: GroupType): GroupedData = {
+    new GroupedData(df, groupingExprs, groupType: GroupType)
+  }
+
+  /**
+   * The Grouping Type
+   */
+  trait GroupType
+
+  /**
+   * To indicate it's the GroupBy
+   */
+  object GroupByType extends GroupType
+
+  /**
+   * To indicate it's the CUBE
+   */
+  object CubeType extends GroupType
+
+  /**
+   * To indicate it's the ROLLUP
+   */
+  object RollupType extends GroupType
+}
 
 /**
  * :: Experimental ::
@@ -34,19 +65,37 @@ import org.apache.spark.sql.types.NumericType
  * @since 1.3.0
  */
 @Experimental
-class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
+class GroupedData protected[sql](
+    df: DataFrame,
+    groupingExprs: Seq[Expression],
+    private val groupType: GroupedData.GroupType) {
 
-  private[sql] implicit def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
-    val namedGroupingExprs = groupingExprs.map {
-      case expr: NamedExpression => expr
-      case expr: Expression => Alias(expr, expr.prettyString)()
+  private[this] def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
+    val aggregates = if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
+        val retainedExprs = groupingExprs.map {
+          case expr: NamedExpression => expr
+          case expr: Expression => Alias(expr, expr.prettyString)()
+        }
+        retainedExprs ++ aggExprs
+      } else {
+        aggExprs
+      }
+
+    groupType match {
+      case GroupedData.GroupByType =>
+        DataFrame(
+          df.sqlContext, Aggregate(groupingExprs, aggregates, df.logicalPlan))
+      case GroupedData.RollupType =>
+        DataFrame(
+          df.sqlContext, Rollup(groupingExprs, df.logicalPlan, aggregates))
+      case GroupedData.CubeType =>
+        DataFrame(
+          df.sqlContext, Cube(groupingExprs, df.logicalPlan, aggregates))
     }
-    DataFrame(
-      df.sqlContext, Aggregate(groupingExprs, namedGroupingExprs ++ aggExprs, df.logicalPlan))
   }
 
   private[this] def aggregateNumericColumns(colNames: String*)(f: Expression => Expression)
-    : Seq[NamedExpression] = {
+    : DataFrame = {
 
     val columnExprs = if (colNames.isEmpty) {
       // No columns specified. Use all numeric columns.
@@ -63,10 +112,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
         namedExpr
       }
     }
-    columnExprs.map { c =>
+    toDF(columnExprs.map { c =>
       val a = f(c)
       Alias(a, a.prettyString)()
-    }
+    })
   }
 
   private[this] def strToExpr(expr: String): (Expression => Expression) = {
@@ -119,10 +168,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    * @since 1.3.0
    */
   def agg(exprs: Map[String, String]): DataFrame = {
-    exprs.map { case (colName, expr) =>
+    toDF(exprs.map { case (colName, expr) =>
       val a = strToExpr(expr)(df(colName).expr)
       Alias(a, a.prettyString)()
-    }.toSeq
+    }.toSeq)
   }
 
   /**
@@ -175,19 +224,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    */
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = {
-    val aggExprs = (expr +: exprs).map(_.expr).map {
+    toDF((expr +: exprs).map(_.expr).map {
       case expr: NamedExpression => expr
       case expr: Expression => Alias(expr, expr.prettyString)()
-    }
-    if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
-      val retainedExprs = groupingExprs.map {
-        case expr: NamedExpression => expr
-        case expr: Expression => Alias(expr, expr.prettyString)()
-      }
-      DataFrame(df.sqlContext, Aggregate(groupingExprs, retainedExprs ++ aggExprs, df.logicalPlan))
-    } else {
-      DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan))
-    }
+    })
   }
 
   /**
@@ -196,7 +236,7 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
    *
    * @since 1.3.0
    */
-  def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")())
+  def count(): DataFrame = toDF(Seq(Alias(Count(Literal(1)), "count")()))
 
   /**
    * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
@@ -256,5 +296,5 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   @scala.annotation.varargs
   def sum(colNames: String*): DataFrame = {
     aggregateNumericColumns(colNames:_*)(Sum)
-  }    
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
new file mode 100644
index 0000000000000..99de14660f676
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.scalatest.BeforeAndAfterAll
+
+case class TestData2Int(a: Int, b: Int)
+
+// TODO ideally we should put the test suite into the package `sql`, as
+// `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
+// support the `cube` or `rollup` yet.
+class HiveDataFrameAnalyticsSuite extends QueryTest with BeforeAndAfterAll {
+  val testData =
+    TestHive.sparkContext.parallelize(
+      TestData2Int(1, 2) ::
+        TestData2Int(2, 4) :: Nil).toDF()
+
+  override def beforeAll() {
+    TestHive.registerDataFrameAsTable(testData, "mytable")
+  }
+
+  override def afterAll(): Unit = {
+    TestHive.dropTempTable("mytable")
+  }
+
+  test("rollup") {
+    checkAnswer(
+      testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect()
+    )
+
+    checkAnswer(
+      testData.rollup("a", "b").agg(sum("b")),
+      sql("select a, b, sum(b) from mytable group by a, b with rollup").collect()
+    )
+  }
+
+  test("cube") {
+    checkAnswer(
+      testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect()
+    )
+
+    checkAnswer(
+      testData.cube("a", "b").agg(sum("b")),
+      sql("select a, b, sum(b) from mytable group by a, b with cube").collect()
+    )
+  }
+}

From ddec173cba63df723cd94508121d8c06d8c153c6 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 20 May 2015 20:30:39 -0700
Subject: [PATCH 289/320] [SPARK-7774] [MLLIB] add sqlContext to
 MLlibTestSparkContext

to simplify test suites that require a SQLContext.

Author: Xiangrui Meng <meng@databricks.com>

Closes #6303 from mengxr/SPARK-7774 and squashes the following commits:

0622b5a [Xiangrui Meng] update some other test suites
e1f9b8d [Xiangrui Meng] add sqlContext to MLlibTestSparkContext
---
 .../ml/classification/LogisticRegressionSuite.scala   |  4 +---
 .../spark/ml/classification/OneVsRestSuite.scala      |  7 +++----
 .../org/apache/spark/ml/feature/BinarizerSuite.scala  |  6 +-----
 .../org/apache/spark/ml/feature/BucketizerSuite.scala |  9 +--------
 .../scala/org/apache/spark/ml/feature/IDFSuite.scala  |  9 +--------
 .../apache/spark/ml/feature/OneHotEncoderSuite.scala  |  8 +-------
 .../spark/ml/feature/PolynomialExpansionSuite.scala   | 11 ++---------
 .../apache/spark/ml/feature/StringIndexerSuite.scala  |  7 -------
 .../org/apache/spark/ml/feature/TokenizerSuite.scala  |  9 +--------
 .../spark/ml/feature/VectorAssemblerSuite.scala       |  9 +--------
 .../apache/spark/ml/feature/VectorIndexerSuite.scala  |  6 +-----
 .../org/apache/spark/ml/recommendation/ALSSuite.scala |  2 --
 .../spark/ml/regression/LinearRegressionSuite.scala   |  4 +---
 .../spark/mllib/util/MLlibTestSparkContext.scala      |  8 ++++++--
 14 files changed, 20 insertions(+), 79 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 97f9749cb4a9a..9f77d5f3efc55 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -23,18 +23,16 @@ import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.{DataFrame, Row}
 
 class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient var sqlContext: SQLContext = _
   @transient var dataset: DataFrame = _
   @transient var binaryDataset: DataFrame = _
   private val eps: Double = 1e-5
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    sqlContext = new SQLContext(sc)
 
     dataset = sqlContext.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42))
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 990cfb08af83b..770b56890fa45 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -21,24 +21,23 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.util.MetadataUtils
-import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.classification.LogisticRegressionSuite._
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.DataFrame
 
 class OneVsRestSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient var sqlContext: SQLContext = _
   @transient var dataset: DataFrame = _
   @transient var rdd: RDD[LabeledPoint] = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    sqlContext = new SQLContext(sc)
+
     val nPoints = 1000
 
     // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
index caf1b759593f3..8f6c6b39dc93b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
@@ -20,18 +20,14 @@ package org.apache.spark.ml.feature
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
-
+import org.apache.spark.sql.{DataFrame, Row}
 
 class BinarizerSuite extends FunSuite with MLlibTestSparkContext {
 
   @transient var data: Array[Double] = _
-  @transient var sqlContext: SQLContext = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    sqlContext = new SQLContext(sc)
     data = Array(0.1, -0.5, 0.2, -0.3, 0.8, 0.7, -0.1, -0.4)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index 20d2f3ac6696b..0391bd8427c2c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -25,17 +25,10 @@ import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.{DataFrame, Row}
 
 class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient private var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
-
   test("Bucket continuous features, without -inf,inf") {
     // Check a set of valid feature values.
     val splits = Array(-0.5, 0.0, 0.5)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
index eaee3443c1f23..f85e85471617a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
@@ -22,17 +22,10 @@ import org.scalatest.FunSuite
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.Row
 
 class IDFSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
-
   def scaleDataWithIDF(dataSet: Array[Vector], model: Vector): Array[Vector] = {
     dataSet.map {
       case data: DenseVector =>
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
index 92ec407b98d69..056b9eda86bba 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
@@ -21,16 +21,10 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.DataFrame
 
 
 class OneHotEncoderSuite extends FunSuite with MLlibTestSparkContext {
-  private var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
 
   def stringIndexed(): DataFrame = {
     val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")), 2)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
index c1d64fba0aa8f..aa230ca073d5b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
@@ -18,22 +18,15 @@
 package org.apache.spark.ml.feature
 
 import org.scalatest.FunSuite
+import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.sql.{Row, SQLContext}
-import org.scalatest.exceptions.TestFailedException
+import org.apache.spark.sql.Row
 
 class PolynomialExpansionSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
-
   test("Polynomial expansion with default parameter") {
     val data = Array(
       Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index b6939e5870410..89c2fe45573aa 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -21,15 +21,8 @@ import org.scalatest.FunSuite
 
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.SQLContext
 
 class StringIndexerSuite extends FunSuite with MLlibTestSparkContext {
-  private var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
 
   test("StringIndexer") {
     val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")), 2)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
index d186ead8f542f..a46d08d65150f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
@@ -22,7 +22,7 @@ import scala.beans.BeanInfo
 import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.{DataFrame, Row}
 
 @BeanInfo
 case class TokenizerTestData(rawText: String, wantedTokens: Array[String])
@@ -30,13 +30,6 @@ case class TokenizerTestData(rawText: String, wantedTokens: Array[String])
 class RegexTokenizerSuite extends FunSuite with MLlibTestSparkContext {
   import org.apache.spark.ml.feature.RegexTokenizerSuite._
   
-  @transient var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
-
   test("RegexTokenizer") {
     val tokenizer = new RegexTokenizer()
       .setInputCol("rawText")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index 0db27607bc274..d0cd62c5e4864 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -22,17 +22,10 @@ import org.scalatest.FunSuite
 import org.apache.spark.SparkException
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.Row
 
 class VectorAssemblerSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient var sqlContext: SQLContext = _
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sqlContext = new SQLContext(sc)
-  }
-
   test("assemble") {
     import org.apache.spark.ml.feature.VectorAssembler.assemble
     assert(assemble(0.0) === Vectors.sparse(1, Array.empty, Array.empty))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
index 38dc83b1241cf..b11b029c6343e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
@@ -26,15 +26,12 @@ import org.apache.spark.ml.attribute._
 import org.apache.spark.mllib.linalg.{SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, SQLContext}
-
+import org.apache.spark.sql.DataFrame
 
 class VectorIndexerSuite extends FunSuite with MLlibTestSparkContext {
 
   import VectorIndexerSuite.FeatureData
 
-  @transient var sqlContext: SQLContext = _
-
   // identical, of length 3
   @transient var densePoints1: DataFrame = _
   @transient var sparsePoints1: DataFrame = _
@@ -86,7 +83,6 @@ class VectorIndexerSuite extends FunSuite with MLlibTestSparkContext {
     checkPair(densePoints1Seq, sparsePoints1Seq)
     checkPair(densePoints2Seq, sparsePoints2Seq)
 
-    sqlContext = new SQLContext(sc)
     densePoints1 = sqlContext.createDataFrame(sc.parallelize(densePoints1Seq, 2).map(FeatureData))
     sparsePoints1 = sqlContext.createDataFrame(sc.parallelize(sparsePoints1Seq, 2).map(FeatureData))
     densePoints2 = sqlContext.createDataFrame(sc.parallelize(densePoints2Seq, 2).map(FeatureData))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 6cc6ec94eb643..9a35555e52b90 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -38,14 +38,12 @@ import org.apache.spark.util.Utils
 
 class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
 
-  private var sqlContext: SQLContext = _
   private var tempDir: File = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
     tempDir = Utils.createTempDir()
     sc.setCheckpointDir(tempDir.getAbsolutePath)
-    sqlContext = new SQLContext(sc)
   }
 
   override def afterAll(): Unit = {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index 80323ef5201a6..50a78631fa6d6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -22,11 +22,10 @@ import org.scalatest.FunSuite
 import org.apache.spark.mllib.linalg.DenseVector
 import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.sql.{Row, SQLContext, DataFrame}
+import org.apache.spark.sql.{DataFrame, Row}
 
 class LinearRegressionSuite extends FunSuite with MLlibTestSparkContext {
 
-  @transient var sqlContext: SQLContext = _
   @transient var dataset: DataFrame = _
 
   /**
@@ -41,7 +40,6 @@ class LinearRegressionSuite extends FunSuite with MLlibTestSparkContext {
    */
   override def beforeAll(): Unit = {
     super.beforeAll()
-    sqlContext = new SQLContext(sc)
     dataset = sqlContext.createDataFrame(
       sc.parallelize(LinearDataGenerator.generateLinearInput(
         6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index b658889476d37..5d1796ef65722 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.mllib.util
 
-import org.scalatest.Suite
-import org.scalatest.BeforeAndAfterAll
+import org.scalatest.{BeforeAndAfterAll, Suite}
 
 import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.SQLContext
 
 trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite =>
   @transient var sc: SparkContext = _
+  @transient var sqlContext: SQLContext = _
 
   override def beforeAll() {
     super.beforeAll()
@@ -31,12 +32,15 @@ trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite =>
       .setMaster("local[2]")
       .setAppName("MLlibUnitTest")
     sc = new SparkContext(conf)
+    sqlContext = new SQLContext(sc)
   }
 
   override def afterAll() {
+    sqlContext = null
     if (sc != null) {
       sc.stop()
     }
+    sc = null
     super.afterAll()
   }
 }

From d0eb9ffe978c663b7aa06e908cadee81767d23d1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 20 May 2015 22:23:49 -0700
Subject: [PATCH 290/320] [SPARK-7746][SQL] Add FetchSize parameter for JDBC
 driver

JIRA: https://issues.apache.org/jira/browse/SPARK-7746

Looks like an easy to add parameter but can show significant performance improvement if the JDBC driver accepts it.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6283 from viirya/jdbc_fetchsize and squashes the following commits:

de47f94 [Liang-Chi Hsieh] Don't keep fetchSize as single parameter.
b7bff2f [Liang-Chi Hsieh] Add FetchSize parameter for JDBC driver.
---
 .../org/apache/spark/sql/jdbc/JDBCRDD.scala   |  8 +++--
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 33 ++++++++++++++++++-
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index f7b19096eaacb..be03a237b6c4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -211,7 +211,8 @@ private[sql] object JDBCRDD extends Logging {
       fqTable,
       requiredColumns,
       filters,
-      parts)
+      parts,
+      properties)
   }
 }
 
@@ -227,7 +228,8 @@ private[sql] class JDBCRDD(
     fqTable: String,
     columns: Array[String],
     filters: Array[Filter],
-    partitions: Array[Partition])
+    partitions: Array[Partition],
+    properties: Properties)
   extends RDD[Row](sc, Nil) {
 
   /**
@@ -356,6 +358,8 @@ private[sql] class JDBCRDD(
     val sqlText = s"SELECT $columnList FROM $fqTable $myWhereClause"
     val stmt = conn.prepareStatement(sqlText,
         ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
+    val fetchSize = properties.getProperty("fetchSize", "0").toInt
+    stmt.setFetchSize(fetchSize)
     val rs = stmt.executeQuery()
 
     val conversions = getConversions(schema)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index a8dddfb9b6858..347f28351fd72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -67,7 +67,15 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
         |USING org.apache.spark.sql.jdbc
         |OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass')
       """.stripMargin.replaceAll("\n", " "))
-
+ 
+    sql(
+      s"""
+        |CREATE TEMPORARY TABLE fetchtwo
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass',
+        |         fetchSize '2')
+      """.stripMargin.replaceAll("\n", " "))
+ 
     sql(
       s"""
         |CREATE TEMPORARY TABLE parts
@@ -185,6 +193,14 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     assert(names(2).equals("mary"))
   }
 
+  test("SELECT first field when fetchSize is two") {
+    val names = sql("SELECT NAME FROM fetchtwo").collect().map(x => x.getString(0)).sortWith(_ < _)
+    assert(names.size === 3)
+    assert(names(0).equals("fred"))
+    assert(names(1).equals("joe 'foo' \"bar\""))
+    assert(names(2).equals("mary"))
+  }
+
   test("SELECT second field") {
     val ids = sql("SELECT THEID FROM foobar").collect().map(x => x.getInt(0)).sortWith(_ < _)
     assert(ids.size === 3)
@@ -192,6 +208,14 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     assert(ids(1) === 2)
     assert(ids(2) === 3)
   }
+ 
+  test("SELECT second field when fetchSize is two") {
+    val ids = sql("SELECT THEID FROM fetchtwo").collect().map(x => x.getInt(0)).sortWith(_ < _)
+    assert(ids.size === 3)
+    assert(ids(0) === 1)
+    assert(ids(1) === 2)
+    assert(ids(2) === 3)
+  }
 
   test("SELECT * partitioned") {
     assert(sql("SELECT * FROM parts").collect().size == 3)
@@ -232,6 +256,13 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
       urlWithUserAndPass, "TEST.PEOPLE", new Properties).collect().length === 3)
   }
 
+  test("Basic API with FetchSize") {
+    val properties = new Properties
+    properties.setProperty("fetchSize", "2")
+    assert(TestSQLContext.read.jdbc(
+      urlWithUserAndPass, "TEST.PEOPLE", properties).collect().length === 3)
+  }
+
   test("Partitioning via JDBCPartitioningInfo API") {
     assert(
       TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3, new Properties)

From 04940c49755fd2e7f1ed7b875da287c946bfebeb Mon Sep 17 00:00:00 2001
From: Mingfei <mingfei.shi@intel.com>
Date: Wed, 20 May 2015 22:33:03 -0700
Subject: [PATCH 291/320] [SPARK-7389] [CORE] Tachyon integration improvement

Two main changes:

Add two functions in ExternalBlockManager, which are putValues and getValues
because the implementation may not rely on the putBytes and getBytes

improve Tachyon integration.
Currently, when putting data into Tachyon, Spark first serialize all data in one partition into a ByteBuffer, and then write into Tachyon, this will uses much memory and increase GC overhead

when get data from Tachyon, getValues depends on getBytes, which also read all data into On heap byte arry, and result in much memory usage.
This PR changes the approach of the two functions, make them read / write data by stream to reduce memory usage.

In our testing,  when data size is huge, this patch reduces about 30% GC time and 70% full GC time, and total execution time reduces about 10%

Author: Mingfei <mingfei.shi@intel.com>

Closes #5908 from shimingfei/Tachyon-integration-rebase and squashes the following commits:

033bc57 [Mingfei] modify accroding to comments
747c69a [Mingfei] modify according to comments - format changes
ce52c67 [Mingfei] put close() in a finally block
d2c60bb [Mingfei] modify according to comments, some code style change
4c11591 [Mingfei] modify according to comments split putIntoExternalBlockStore into two functions add default implementation for getValues and putValues
cc0a32e [Mingfei] Make getValues read data from Tachyon by stream Make putValues write data to Tachyon by stream
017593d [Mingfei] add getValues and putValues in ExternalBlockManager's Interface
---
 .../apache/spark/storage/BlockManager.scala   | 36 +++++---
 .../spark/storage/ExternalBlockManager.scala  | 22 ++++-
 .../spark/storage/ExternalBlockStore.scala    | 88 +++++++++++++------
 .../spark/storage/TachyonBlockManager.scala   | 51 +++++++++--
 4 files changed, 149 insertions(+), 48 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 16d67cbfca80b..5048c7dab240b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream, OutputStream}
+import java.io._
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -489,16 +489,17 @@ private[spark] class BlockManager(
         if (level.useOffHeap) {
           logDebug(s"Getting block $blockId from ExternalBlockStore")
           if (externalBlockStore.contains(blockId)) {
-            externalBlockStore.getBytes(blockId) match {
-              case Some(bytes) =>
-                if (!asBlockResult) {
-                  return Some(bytes)
-                } else {
-                  return Some(new BlockResult(
-                    dataDeserialize(blockId, bytes), DataReadMethod.Memory, info.size))
-                }
+            val result = if (asBlockResult) {
+              externalBlockStore.getValues(blockId)
+                .map(new BlockResult(_, DataReadMethod.Memory, info.size))
+            } else {
+              externalBlockStore.getBytes(blockId)
+            }
+            result match {
+              case Some(values) =>
+                return result
               case None =>
-                logDebug(s"Block $blockId not found in externalBlockStore")
+                logDebug(s"Block $blockId not found in ExternalBlockStore")
             }
           }
         }
@@ -1206,8 +1207,19 @@ private[spark] class BlockManager(
       bytes: ByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
     bytes.rewind()
-    val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true))
-    serializer.newInstance().deserializeStream(stream).asIterator
+    dataDeserializeStream(blockId, new ByteBufferInputStream(bytes, true), serializer)
+  }
+
+  /**
+   * Deserializes a InputStream into an iterator of values and disposes of it when the end of
+   * the iterator is reached.
+   */
+  def dataDeserializeStream(
+      blockId: BlockId,
+      inputStream: InputStream,
+      serializer: Serializer = defaultSerializer): Iterator[Any] = {
+    val stream = new BufferedInputStream(inputStream)
+    serializer.newInstance().deserializeStream(wrapForCompression(blockId, stream)).asIterator
   }
 
   def stop(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala
index 8964762df6af3..f39325a12d244 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala
@@ -32,6 +32,8 @@ import java.nio.ByteBuffer
  */
 private[spark] abstract class ExternalBlockManager {
 
+  protected var blockManager: BlockManager = _
+
   override def toString: String = {"External Block Store"}
 
   /**
@@ -41,7 +43,9 @@ private[spark] abstract class ExternalBlockManager {
    *
    * @throws java.io.IOException if there is any file system failure during the initialization.
    */
-  def init(blockManager: BlockManager, executorId: String): Unit
+  def init(blockManager: BlockManager, executorId: String): Unit = {
+    this.blockManager = blockManager
+  }
 
   /**
    * Drop the block from underlying external block store, if it exists..
@@ -73,6 +77,11 @@ private[spark] abstract class ExternalBlockManager {
    */
   def putBytes(blockId: BlockId, bytes: ByteBuffer): Unit
 
+  def putValues(blockId: BlockId, values: Iterator[_]): Unit = {
+    val bytes = blockManager.dataSerialize(blockId, values)
+    putBytes(blockId, bytes)
+  }
+
   /**
    * Retrieve the block bytes.
    * @return Some(ByteBuffer) if the block bytes is successfully retrieved
@@ -82,6 +91,17 @@ private[spark] abstract class ExternalBlockManager {
    */
   def getBytes(blockId: BlockId): Option[ByteBuffer]
 
+  /**
+   * Retrieve the block data.
+   * @return Some(Iterator[Any]) if the block data is successfully retrieved
+   *         None if the block does not exist in the external block store.
+   *
+   * @throws java.io.IOException if there is any file system failure in getting the block.
+   */
+  def getValues(blockId: BlockId): Option[Iterator[_]] = {
+    getBytes(blockId).map(buffer => blockManager.dataDeserialize(blockId, buffer))
+  }
+
   /**
    * Get the size of the block saved in the underlying external block store,
    * which is saved before by putBytes.
diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
index 0bf770306ae9b..291394ed34816 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.storage
 
 import java.nio.ByteBuffer
+
+import scala.util.control.NonFatal
+
 import org.apache.spark.Logging
 import org.apache.spark.util.Utils
-import scala.util.control.NonFatal
 
 
 /**
@@ -40,7 +42,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
       externalBlockManager.map(_.getSize(blockId)).getOrElse(0)
     } catch {
       case NonFatal(t) =>
-        logError(s"error in getSize from $blockId", t)
+        logError(s"Error in getSize($blockId)", t)
         0L
     }
   }
@@ -54,7 +56,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
       values: Array[Any],
       level: StorageLevel,
       returnValues: Boolean): PutResult = {
-    putIterator(blockId, values.toIterator, level, returnValues)
+    putIntoExternalBlockStore(blockId, values.toIterator, returnValues)
   }
 
   override def putIterator(
@@ -62,42 +64,70 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
       values: Iterator[Any],
       level: StorageLevel,
       returnValues: Boolean): PutResult = {
-    logDebug(s"Attempting to write values for block $blockId")
-    val bytes = blockManager.dataSerialize(blockId, values)
-    putIntoExternalBlockStore(blockId, bytes, returnValues)
+    putIntoExternalBlockStore(blockId, values, returnValues)
   }
 
   private def putIntoExternalBlockStore(
       blockId: BlockId,
-      bytes: ByteBuffer,
+      values: Iterator[_],
       returnValues: Boolean): PutResult = {
-    // So that we do not modify the input offsets !
-    // duplicate does not copy buffer, so inexpensive
-    val byteBuffer = bytes.duplicate()
-    byteBuffer.rewind()
-    logDebug(s"Attempting to put block $blockId into ExtBlk store")
+    logTrace(s"Attempting to put block $blockId into ExternalBlockStore")
     // we should never hit here if externalBlockManager is None. Handle it anyway for safety.
     try {
       val startTime = System.currentTimeMillis
       if (externalBlockManager.isDefined) {
-        externalBlockManager.get.putBytes(blockId, bytes)
+        externalBlockManager.get.putValues(blockId, values)
+        val size = getSize(blockId)
+        val data = if (returnValues) {
+          Left(getValues(blockId).get)
+        } else {
+          null
+        }
         val finishTime = System.currentTimeMillis
         logDebug("Block %s stored as %s file in ExternalBlockStore in %d ms".format(
-          blockId, Utils.bytesToString(byteBuffer.limit), finishTime - startTime))
+          blockId, Utils.bytesToString(size), finishTime - startTime))
+        PutResult(size, data)
+      } else {
+        logError(s"Error in putValues($blockId): no ExternalBlockManager has been configured")
+        PutResult(-1, null, Seq((blockId, BlockStatus.empty)))
+      }
+    } catch {
+      case NonFatal(t) =>
+        logError(s"Error in putValues($blockId)", t)
+        PutResult(-1, null, Seq((blockId, BlockStatus.empty)))
+    }
+  }
 
-        if (returnValues) {
-          PutResult(bytes.limit(), Right(bytes.duplicate()))
+  private def putIntoExternalBlockStore(
+      blockId: BlockId,
+      bytes: ByteBuffer,
+      returnValues: Boolean): PutResult = {
+    logTrace(s"Attempting to put block $blockId into ExternalBlockStore")
+    // we should never hit here if externalBlockManager is None. Handle it anyway for safety.
+    try {
+      val startTime = System.currentTimeMillis
+      if (externalBlockManager.isDefined) {
+        val byteBuffer = bytes.duplicate()
+        byteBuffer.rewind()
+        externalBlockManager.get.putBytes(blockId, byteBuffer)
+        val size = bytes.limit()
+        val data = if (returnValues) {
+          Right(bytes)
         } else {
-          PutResult(bytes.limit(), null)
+          null
         }
+        val finishTime = System.currentTimeMillis
+        logDebug("Block %s stored as %s file in ExternalBlockStore in %d ms".format(
+          blockId, Utils.bytesToString(size), finishTime - startTime))
+        PutResult(size, data)
       } else {
-        logError(s"error in putBytes $blockId")
-        PutResult(bytes.limit(), null, Seq((blockId, BlockStatus.empty)))
+        logError(s"Error in putBytes($blockId): no ExternalBlockManager has been configured")
+        PutResult(-1, null, Seq((blockId, BlockStatus.empty)))
       }
     } catch {
       case NonFatal(t) =>
-        logError(s"error in putBytes $blockId", t)
-        PutResult(bytes.limit(), null, Seq((blockId, BlockStatus.empty)))
+        logError(s"Error in putBytes($blockId)", t)
+        PutResult(-1, null, Seq((blockId, BlockStatus.empty)))
     }
   }
 
@@ -107,13 +137,19 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
       externalBlockManager.map(_.removeBlock(blockId)).getOrElse(true)
     } catch {
       case NonFatal(t) =>
-        logError(s"error in removing $blockId", t)
+        logError(s"Error in removeBlock($blockId)", t)
         true
     }
   }
 
   override def getValues(blockId: BlockId): Option[Iterator[Any]] = {
-    getBytes(blockId).map(buffer => blockManager.dataDeserialize(blockId, buffer))
+    try {
+      externalBlockManager.flatMap(_.getValues(blockId))
+    } catch {
+      case NonFatal(t) =>
+        logError(s"Error in getValues($blockId)", t)
+        None
+    }
   }
 
   override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
@@ -121,7 +157,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
       externalBlockManager.flatMap(_.getBytes(blockId))
     } catch {
       case NonFatal(t) =>
-        logError(s"error in getBytes from $blockId", t)
+        logError(s"Error in getBytes($blockId)", t)
         None
     }
   }
@@ -130,13 +166,13 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
     try {
       val ret = externalBlockManager.map(_.blockExists(blockId)).getOrElse(false)
       if (!ret) {
-        logInfo(s"remove block $blockId")
+        logInfo(s"Remove block $blockId")
         blockManager.removeBlock(blockId, true)
       }
       ret
     } catch {
       case NonFatal(t) =>
-        logError(s"error in getBytes from $blockId", t)
+        logError(s"Error in getBytes($blockId)", t)
         false
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index bdc6276e41915..fb4ba0eac9d9a 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -22,7 +22,10 @@ import java.nio.ByteBuffer
 import java.text.SimpleDateFormat
 import java.util.{Date, Random}
 
+import scala.util.control.NonFatal
+
 import com.google.common.io.ByteStreams
+
 import tachyon.client.{ReadType, WriteType, TachyonFS, TachyonFile}
 import tachyon.TachyonURI
 
@@ -38,7 +41,6 @@ import org.apache.spark.util.Utils
  */
 private[spark] class TachyonBlockManager() extends ExternalBlockManager with Logging {
 
-  var blockManager: BlockManager =_
   var rootDirs: String = _
   var master: String = _
   var client: tachyon.client.TachyonFS = _
@@ -52,7 +54,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
 
 
   override def init(blockManager: BlockManager, executorId: String): Unit = {
-    this.blockManager = blockManager
+    super.init(blockManager, executorId)
     val storeDir = blockManager.conf.get(ExternalBlockStore.BASE_DIR, "/tmp_spark_tachyon")
     val appFolderName = blockManager.conf.get(ExternalBlockStore.FOLD_NAME)
 
@@ -95,8 +97,29 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
   override def putBytes(blockId: BlockId, bytes: ByteBuffer): Unit = {
     val file = getFile(blockId)
     val os = file.getOutStream(WriteType.TRY_CACHE)
-    os.write(bytes.array())
-    os.close()
+    try {
+      os.write(bytes.array())
+    } catch {
+      case NonFatal(e) => 
+        logWarning(s"Failed to put bytes of block $blockId into Tachyon", e)
+        os.cancel()
+    } finally {
+      os.close()
+    }
+  }
+
+  override def putValues(blockId: BlockId, values: Iterator[_]): Unit = {
+    val file = getFile(blockId)
+    val os = file.getOutStream(WriteType.TRY_CACHE)
+    try {
+      blockManager.dataSerializeStream(blockId, os, values)
+    } catch {
+      case NonFatal(e) => 
+        logWarning(s"Failed to put values of block $blockId into Tachyon", e)
+        os.cancel()
+    } finally {
+      os.close()
+    }
   }
 
   override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
@@ -105,21 +128,31 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
       return None
     }
     val is = file.getInStream(ReadType.CACHE)
-    assert (is != null)
     try {
       val size = file.length
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
       Some(ByteBuffer.wrap(bs))
     } catch {
-      case ioe: IOException =>
-        logWarning(s"Failed to fetch the block $blockId from Tachyon", ioe)
+      case NonFatal(e) =>
+        logWarning(s"Failed to get bytes of block $blockId from Tachyon", e)
         None
     } finally {
       is.close()
     }
   }
 
+  override def getValues(blockId: BlockId): Option[Iterator[_]] = {
+    val file = getFile(blockId)
+    if (file == null || file.getLocationHosts().size() == 0) {
+      return None
+    }
+    val is = file.getInStream(ReadType.CACHE)
+    Option(is).map { is =>
+      blockManager.dataDeserializeStream(blockId, is)
+    }
+  }
+
   override def getSize(blockId: BlockId): Long = {
     getFile(blockId.name).length
   }
@@ -184,7 +217,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
             tachyonDir = client.getFile(path)
           }
         } catch {
-          case e: Exception =>
+          case NonFatal(e) =>
             logWarning("Attempt " + tries + " to create tachyon dir " + tachyonDir + " failed", e)
         }
       }
@@ -206,7 +239,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
           Utils.deleteRecursively(tachyonDir, client)
         }
       } catch {
-        case e: Exception =>
+        case NonFatal(e) =>
           logError("Exception while deleting tachyon spark dir: " + tachyonDir, e)
       }
     }

From 8ddcb25b3990ec691463f87d4071e7425f4909a9 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Wed, 20 May 2015 23:05:54 -0700
Subject: [PATCH 292/320] [SPARK-7606] [SQL] [PySpark] add version to Python
 SQL API docs

Add version info for public Python SQL API.

cc rxin

Author: Davies Liu <davies@databricks.com>

Closes #6295 from davies/versions and squashes the following commits:

cfd91e6 [Davies Liu] add more version for DataFrame API
600834d [Davies Liu] add version to SQL API docs
---
 python/pyspark/sql/__init__.py   |  7 ++++
 python/pyspark/sql/column.py     | 12 ++++++
 python/pyspark/sql/context.py    | 29 ++++++++++++--
 python/pyspark/sql/dataframe.py  | 68 +++++++++++++++++++++++++++++++-
 python/pyspark/sql/functions.py  | 47 +++++++++++++++-------
 python/pyspark/sql/group.py      | 10 +++++
 python/pyspark/sql/readwriter.py | 15 +++++++
 7 files changed, 170 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index 634c575ecd80e..66b0bff2908b7 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -41,6 +41,13 @@
 """
 from __future__ import absolute_import
 
+
+def since(version):
+    def deco(f):
+        f.__doc__ = f.__doc__.rstrip() + "\n\n.. versionadded:: %s" % version
+        return f
+    return deco
+
 # fix the module name conflict for Python 3+
 import sys
 from . import _types as types
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index fc7ad674daa5b..d03bb6d33dd03 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -23,6 +23,7 @@
 
 from pyspark.context import SparkContext
 from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql import since
 from pyspark.sql.types import *
 
 __all__ = ["DataFrame", "Column", "SchemaRDD", "DataFrameNaFunctions",
@@ -114,6 +115,8 @@ class Column(object):
         # 2. Create from an expression
         df.colName + 1
         1 / df.colName
+
+    .. versionadded:: 1.3
     """
 
     def __init__(self, jc):
@@ -159,6 +162,7 @@ def __init__(self, jc):
     bitwiseAND = _bin_op("bitwiseAND")
     bitwiseXOR = _bin_op("bitwiseXOR")
 
+    @since(1.3)
     def getItem(self, key):
         """An expression that gets an item at position `ordinal` out of a list,
          or gets an item by key out of a dict.
@@ -179,6 +183,7 @@ def getItem(self, key):
         """
         return self[key]
 
+    @since(1.3)
     def getField(self, name):
         """An expression that gets a field by name in a StructField.
 
@@ -211,6 +216,7 @@ def __getattr__(self, item):
     endswith = _bin_op("endsWith")
 
     @ignore_unicode_prefix
+    @since(1.3)
     def substr(self, startPos, length):
         """
         Return a :class:`Column` which is a substring of the column
@@ -234,6 +240,7 @@ def substr(self, startPos, length):
     __getslice__ = substr
 
     @ignore_unicode_prefix
+    @since(1.3)
     def inSet(self, *cols):
         """ A boolean expression that is evaluated to true if the value of this
         expression is contained by the evaluated values of the arguments.
@@ -259,6 +266,7 @@ def inSet(self, *cols):
     isNull = _unary_op("isNull", "True if the current expression is null.")
     isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
 
+    @since(1.3)
     def alias(self, *alias):
         """Returns this column aliased with a new name or names (in the case of expressions that
         return more than one column, such as explode).
@@ -274,6 +282,7 @@ def alias(self, *alias):
             return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
 
     @ignore_unicode_prefix
+    @since(1.3)
     def cast(self, dataType):
         """ Convert the column into type `dataType`
 
@@ -294,6 +303,7 @@ def cast(self, dataType):
         return Column(jc)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def between(self, lowerBound, upperBound):
         """ A boolean expression that is evaluated to true if the value of this
         expression is between the given columns.
@@ -301,6 +311,7 @@ def between(self, lowerBound, upperBound):
         return (self >= lowerBound) & (self <= upperBound)
 
     @ignore_unicode_prefix
+    @since(1.4)
     def when(self, condition, value):
         """Evaluates a list of conditions and returns one of multiple possible result expressions.
         If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
@@ -319,6 +330,7 @@ def when(self, condition, value):
         return Column(jc)
 
     @ignore_unicode_prefix
+    @since(1.4)
     def otherwise(self, value):
         """Evaluates a list of conditions and returns one of multiple possible result expressions.
         If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 7543475014bd2..51f12c5bb4198 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -28,6 +28,7 @@
 
 from pyspark.rdd import RDD, _prepare_for_python_RDD, ignore_unicode_prefix
 from pyspark.serializers import AutoBatchedSerializer, PickleSerializer
+from pyspark.sql import since
 from pyspark.sql.types import Row, StringType, StructType, _verify_type, \
     _infer_schema, _has_nulltype, _merge_type, _create_converter, _python_to_sql_converter
 from pyspark.sql.dataframe import DataFrame
@@ -106,11 +107,13 @@ def _ssql_ctx(self):
             self._scala_SQLContext = self._jvm.SQLContext(self._jsc.sc())
         return self._scala_SQLContext
 
+    @since(1.3)
     def setConf(self, key, value):
         """Sets the given Spark SQL configuration property.
         """
         self._ssql_ctx.setConf(key, value)
 
+    @since(1.3)
     def getConf(self, key, defaultValue):
         """Returns the value of Spark SQL configuration property for the given key.
 
@@ -119,10 +122,12 @@ def getConf(self, key, defaultValue):
         return self._ssql_ctx.getConf(key, defaultValue)
 
     @property
+    @since("1.3.1")
     def udf(self):
         """Returns a :class:`UDFRegistration` for UDF registration."""
         return UDFRegistration(self)
 
+    @since(1.4)
     def range(self, start, end, step=1, numPartitions=None):
         """
         Create a :class:`DataFrame` with single LongType column named `id`,
@@ -144,6 +149,7 @@ def range(self, start, end, step=1, numPartitions=None):
         return DataFrame(jdf, self)
 
     @ignore_unicode_prefix
+    @since(1.2)
     def registerFunction(self, name, f, returnType=StringType()):
         """Registers a lambda function as a UDF so it can be used in SQL statements.
 
@@ -210,7 +216,8 @@ def _inferSchema(self, rdd, samplingRatio=None):
 
     @ignore_unicode_prefix
     def inferSchema(self, rdd, samplingRatio=None):
-        """::note: Deprecated in 1.3, use :func:`createDataFrame` instead.
+        """
+        .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
         """
         warnings.warn("inferSchema is deprecated, please use createDataFrame instead")
 
@@ -221,7 +228,8 @@ def inferSchema(self, rdd, samplingRatio=None):
 
     @ignore_unicode_prefix
     def applySchema(self, rdd, schema):
-        """::note: Deprecated in 1.3, use :func:`createDataFrame` instead.
+        """
+        .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
         """
         warnings.warn("applySchema is deprecated, please use createDataFrame instead")
 
@@ -233,6 +241,7 @@ def applySchema(self, rdd, schema):
 
         return self.createDataFrame(rdd, schema)
 
+    @since(1.3)
     @ignore_unicode_prefix
     def createDataFrame(self, data, schema=None, samplingRatio=None):
         """
@@ -337,6 +346,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
         df = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
         return DataFrame(df, self)
 
+    @since(1.3)
     def registerDataFrameAsTable(self, df, tableName):
         """Registers the given :class:`DataFrame` as a temporary table in the catalog.
 
@@ -349,6 +359,7 @@ def registerDataFrameAsTable(self, df, tableName):
         else:
             raise ValueError("Can only register DataFrame as table")
 
+    @since(1.0)
     def parquetFile(self, *paths):
         """Loads a Parquet file, returning the result as a :class:`DataFrame`.
 
@@ -367,6 +378,7 @@ def parquetFile(self, *paths):
         jdf = self._ssql_ctx.parquetFile(jpaths)
         return DataFrame(jdf, self)
 
+    @since(1.0)
     def jsonFile(self, path, schema=None, samplingRatio=1.0):
         """Loads a text file storing one JSON object per line as a :class:`DataFrame`.
 
@@ -407,6 +419,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
         return DataFrame(df, self)
 
     @ignore_unicode_prefix
+    @since(1.0)
     def jsonRDD(self, rdd, schema=None, samplingRatio=1.0):
         """Loads an RDD storing one JSON object per string as a :class:`DataFrame`.
 
@@ -449,6 +462,7 @@ def func(iterator):
             df = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype)
         return DataFrame(df, self)
 
+    @since(1.3)
     def load(self, path=None, source=None, schema=None, **options):
         """Returns the dataset in a data source as a :class:`DataFrame`.
 
@@ -460,6 +474,7 @@ def load(self, path=None, source=None, schema=None, **options):
         """
         return self.read.load(path, source, schema, **options)
 
+    @since(1.3)
     def createExternalTable(self, tableName, path=None, source=None,
                             schema=None, **options):
         """Creates an external table based on the dataset in a data source.
@@ -489,6 +504,7 @@ def createExternalTable(self, tableName, path=None, source=None,
         return DataFrame(df, self)
 
     @ignore_unicode_prefix
+    @since(1.0)
     def sql(self, sqlQuery):
         """Returns a :class:`DataFrame` representing the result of the given query.
 
@@ -499,6 +515,7 @@ def sql(self, sqlQuery):
         """
         return DataFrame(self._ssql_ctx.sql(sqlQuery), self)
 
+    @since(1.0)
     def table(self, tableName):
         """Returns the specified table as a :class:`DataFrame`.
 
@@ -510,6 +527,7 @@ def table(self, tableName):
         return DataFrame(self._ssql_ctx.table(tableName), self)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def tables(self, dbName=None):
         """Returns a :class:`DataFrame` containing names of tables in the given database.
 
@@ -528,6 +546,7 @@ def tables(self, dbName=None):
         else:
             return DataFrame(self._ssql_ctx.tables(dbName), self)
 
+    @since(1.3)
     def tableNames(self, dbName=None):
         """Returns a list of names of tables in the database ``dbName``.
 
@@ -544,25 +563,29 @@ def tableNames(self, dbName=None):
         else:
             return [name for name in self._ssql_ctx.tableNames(dbName)]
 
+    @since(1.0)
     def cacheTable(self, tableName):
         """Caches the specified table in-memory."""
         self._ssql_ctx.cacheTable(tableName)
 
+    @since(1.0)
     def uncacheTable(self, tableName):
         """Removes the specified table from the in-memory cache."""
         self._ssql_ctx.uncacheTable(tableName)
 
+    @since(1.3)
     def clearCache(self):
         """Removes all cached tables from the in-memory cache. """
         self._ssql_ctx.clearCache()
 
     @property
+    @since(1.4)
     def read(self):
         """
         Returns a :class:`DataFrameReader` that can be used to read data
         in as a :class:`DataFrame`.
 
-        ::note: Experimental
+        .. note:: Experimental
 
         >>> sqlContext.read
         <pyspark.sql.readwriter.DataFrameReader object at ...>
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index f2280b5100e53..3fc7d0048edf6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -29,6 +29,7 @@
 from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
+from pyspark.sql import since
 from pyspark.sql.types import _create_cls, _parse_datatype_json_string
 from pyspark.sql.column import Column, _to_seq, _to_java_column
 from pyspark.sql.readwriter import DataFrameWriter
@@ -60,6 +61,8 @@ class DataFrame(object):
 
         people.filter(people.age > 30).join(department, people.deptId == department.id)) \
           .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
+
+    .. versionadded:: 1.3
     """
 
     def __init__(self, jdf, sql_ctx):
@@ -71,6 +74,7 @@ def __init__(self, jdf, sql_ctx):
         self._lazy_rdd = None
 
     @property
+    @since(1.3)
     def rdd(self):
         """Returns the content as an :class:`pyspark.RDD` of :class:`Row`.
         """
@@ -88,18 +92,21 @@ def applySchema(it):
         return self._lazy_rdd
 
     @property
+    @since("1.3.1")
     def na(self):
         """Returns a :class:`DataFrameNaFunctions` for handling missing values.
         """
         return DataFrameNaFunctions(self)
 
     @property
+    @since(1.4)
     def stat(self):
         """Returns a :class:`DataFrameStatFunctions` for statistic functions.
         """
         return DataFrameStatFunctions(self)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def toJSON(self, use_unicode=True):
         """Converts a :class:`DataFrame` into a :class:`RDD` of string.
 
@@ -111,6 +118,7 @@ def toJSON(self, use_unicode=True):
         rdd = self._jdf.toJSON()
         return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
 
+    @since(1.3)
     def saveAsParquetFile(self, path):
         """Saves the contents as a Parquet file, preserving the schema.
 
@@ -127,6 +135,7 @@ def saveAsParquetFile(self, path):
         """
         self._jdf.saveAsParquetFile(path)
 
+    @since(1.3)
     def registerTempTable(self, name):
         """Registers this RDD as a temporary table using the given name.
 
@@ -140,11 +149,13 @@ def registerTempTable(self, name):
         """
         self._jdf.registerTempTable(name)
 
+    @since(1.3)
     def registerAsTable(self, name):
         """DEPRECATED: use :func:`registerTempTable` instead"""
         warnings.warn("Use registerTempTable instead of registerAsTable.", DeprecationWarning)
         self.registerTempTable(name)
 
+    @since(1.3)
     def insertInto(self, tableName, overwrite=False):
         """Inserts the contents of this :class:`DataFrame` into the specified table.
 
@@ -152,6 +163,7 @@ def insertInto(self, tableName, overwrite=False):
         """
         self._jdf.insertInto(tableName, overwrite)
 
+    @since(1.3)
     def saveAsTable(self, tableName, source=None, mode="error", **options):
         """Saves the contents of this :class:`DataFrame` to a data source as a table.
 
@@ -169,6 +181,7 @@ def saveAsTable(self, tableName, source=None, mode="error", **options):
         """
         self.write.saveAsTable(tableName, source, mode, **options)
 
+    @since(1.3)
     def save(self, path=None, source=None, mode="error", **options):
         """Saves the contents of the :class:`DataFrame` to a data source.
 
@@ -187,6 +200,7 @@ def save(self, path=None, source=None, mode="error", **options):
         return self.write.save(path, source, mode, **options)
 
     @property
+    @since(1.4)
     def write(self):
         """
         Interface for saving the content of the :class:`DataFrame` out
@@ -194,7 +208,7 @@ def write(self):
 
         :return :class:`DataFrameWriter`
 
-        ::note: Experimental
+        .. note:: Experimental
 
         >>> df.write
         <pyspark.sql.readwriter.DataFrameWriter object at ...>
@@ -202,6 +216,7 @@ def write(self):
         return DataFrameWriter(self)
 
     @property
+    @since(1.3)
     def schema(self):
         """Returns the schema of this :class:`DataFrame` as a :class:`types.StructType`.
 
@@ -212,6 +227,7 @@ def schema(self):
             self._schema = _parse_datatype_json_string(self._jdf.schema().json())
         return self._schema
 
+    @since(1.3)
     def printSchema(self):
         """Prints out the schema in the tree format.
 
@@ -223,6 +239,7 @@ def printSchema(self):
         """
         print(self._jdf.schema().treeString())
 
+    @since(1.3)
     def explain(self, extended=False):
         """Prints the (logical and physical) plans to the console for debugging purpose.
 
@@ -248,12 +265,14 @@ def explain(self, extended=False):
         else:
             print(self._jdf.queryExecution().executedPlan().toString())
 
+    @since(1.3)
     def isLocal(self):
         """Returns ``True`` if the :func:`collect` and :func:`take` methods can be run locally
         (without any Spark executors).
         """
         return self._jdf.isLocal()
 
+    @since(1.3)
     def show(self, n=20):
         """Prints the first ``n`` rows to the console.
 
@@ -272,6 +291,7 @@ def show(self, n=20):
     def __repr__(self):
         return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
 
+    @since(1.3)
     def count(self):
         """Returns the number of rows in this :class:`DataFrame`.
 
@@ -281,6 +301,7 @@ def count(self):
         return int(self._jdf.count())
 
     @ignore_unicode_prefix
+    @since(1.3)
     def collect(self):
         """Returns all the records as a list of :class:`Row`.
 
@@ -294,6 +315,7 @@ def collect(self):
         return [cls(r) for r in rs]
 
     @ignore_unicode_prefix
+    @since(1.3)
     def limit(self, num):
         """Limits the result count to the number specified.
 
@@ -306,6 +328,7 @@ def limit(self, num):
         return DataFrame(jdf, self.sql_ctx)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def take(self, num):
         """Returns the first ``num`` rows as a :class:`list` of :class:`Row`.
 
@@ -315,6 +338,7 @@ def take(self, num):
         return self.limit(num).collect()
 
     @ignore_unicode_prefix
+    @since(1.3)
     def map(self, f):
         """ Returns a new :class:`RDD` by applying a the ``f`` function to each :class:`Row`.
 
@@ -326,6 +350,7 @@ def map(self, f):
         return self.rdd.map(f)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def flatMap(self, f):
         """ Returns a new :class:`RDD` by first applying the ``f`` function to each :class:`Row`,
         and then flattening the results.
@@ -337,6 +362,7 @@ def flatMap(self, f):
         """
         return self.rdd.flatMap(f)
 
+    @since(1.3)
     def mapPartitions(self, f, preservesPartitioning=False):
         """Returns a new :class:`RDD` by applying the ``f`` function to each partition.
 
@@ -349,6 +375,7 @@ def mapPartitions(self, f, preservesPartitioning=False):
         """
         return self.rdd.mapPartitions(f, preservesPartitioning)
 
+    @since(1.3)
     def foreach(self, f):
         """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`.
 
@@ -360,6 +387,7 @@ def foreach(self, f):
         """
         return self.rdd.foreach(f)
 
+    @since(1.3)
     def foreachPartition(self, f):
         """Applies the ``f`` function to each partition of this :class:`DataFrame`.
 
@@ -372,6 +400,7 @@ def foreachPartition(self, f):
         """
         return self.rdd.foreachPartition(f)
 
+    @since(1.3)
     def cache(self):
         """ Persists with the default storage level (C{MEMORY_ONLY_SER}).
         """
@@ -379,6 +408,7 @@ def cache(self):
         self._jdf.cache()
         return self
 
+    @since(1.3)
     def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER):
         """Sets the storage level to persist its values across operations
         after the first time it is computed. This can only be used to assign
@@ -390,6 +420,7 @@ def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER):
         self._jdf.persist(javaStorageLevel)
         return self
 
+    @since(1.3)
     def unpersist(self, blocking=True):
         """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from
         memory and disk.
@@ -398,6 +429,7 @@ def unpersist(self, blocking=True):
         self._jdf.unpersist(blocking)
         return self
 
+    @since(1.4)
     def coalesce(self, numPartitions):
         """
         Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions.
@@ -412,6 +444,7 @@ def coalesce(self, numPartitions):
         """
         return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx)
 
+    @since(1.3)
     def repartition(self, numPartitions):
         """Returns a new :class:`DataFrame` that has exactly ``numPartitions`` partitions.
 
@@ -420,6 +453,7 @@ def repartition(self, numPartitions):
         """
         return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx)
 
+    @since(1.3)
     def distinct(self):
         """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`.
 
@@ -428,6 +462,7 @@ def distinct(self):
         """
         return DataFrame(self._jdf.distinct(), self.sql_ctx)
 
+    @since(1.3)
     def sample(self, withReplacement, fraction, seed=None):
         """Returns a sampled subset of this :class:`DataFrame`.
 
@@ -439,6 +474,7 @@ def sample(self, withReplacement, fraction, seed=None):
         rdd = self._jdf.sample(withReplacement, fraction, long(seed))
         return DataFrame(rdd, self.sql_ctx)
 
+    @since(1.4)
     def randomSplit(self, weights, seed=None):
         """Randomly splits this :class:`DataFrame` with the provided weights.
 
@@ -461,6 +497,7 @@ def randomSplit(self, weights, seed=None):
         return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array]
 
     @property
+    @since(1.3)
     def dtypes(self):
         """Returns all column names and their data types as a list.
 
@@ -471,6 +508,7 @@ def dtypes(self):
 
     @property
     @ignore_unicode_prefix
+    @since(1.3)
     def columns(self):
         """Returns all column names as a list.
 
@@ -480,6 +518,7 @@ def columns(self):
         return [f.name for f in self.schema.fields]
 
     @ignore_unicode_prefix
+    @since(1.3)
     def alias(self, alias):
         """Returns a new :class:`DataFrame` with an alias set.
 
@@ -494,6 +533,7 @@ def alias(self, alias):
         return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def join(self, other, joinExprs=None, joinType=None):
         """Joins with another :class:`DataFrame`, using the given join expression.
 
@@ -527,6 +567,7 @@ def join(self, other, joinExprs=None, joinType=None):
         return DataFrame(jdf, self.sql_ctx)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def sort(self, *cols, **kwargs):
         """Returns a new :class:`DataFrame` sorted by the specified column(s).
 
@@ -586,6 +627,7 @@ def _jcols(self, *cols):
             cols = cols[0]
         return self._jseq(cols, _to_java_column)
 
+    @since("1.3.1")
     def describe(self, *cols):
         """Computes statistics for numeric columns.
 
@@ -607,6 +649,7 @@ def describe(self, *cols):
         return DataFrame(jdf, self.sql_ctx)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def head(self, n=None):
         """
         Returns the first ``n`` rows as a list of :class:`Row`,
@@ -623,6 +666,7 @@ def head(self, n=None):
         return self.take(n)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def first(self):
         """Returns the first row as a :class:`Row`.
 
@@ -632,6 +676,7 @@ def first(self):
         return self.head()
 
     @ignore_unicode_prefix
+    @since(1.3)
     def __getitem__(self, item):
         """Returns the column as a :class:`Column`.
 
@@ -659,6 +704,7 @@ def __getitem__(self, item):
         else:
             raise TypeError("unexpected item type: %s" % type(item))
 
+    @since(1.3)
     def __getattr__(self, name):
         """Returns the :class:`Column` denoted by ``name``.
 
@@ -672,6 +718,7 @@ def __getattr__(self, name):
         return Column(jc)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def select(self, *cols):
         """Projects a set of expressions and returns a new :class:`DataFrame`.
 
@@ -689,6 +736,7 @@ def select(self, *cols):
         jdf = self._jdf.select(self._jcols(*cols))
         return DataFrame(jdf, self.sql_ctx)
 
+    @since(1.3)
     def selectExpr(self, *expr):
         """Projects a set of SQL expressions and returns a new :class:`DataFrame`.
 
@@ -703,6 +751,7 @@ def selectExpr(self, *expr):
         return DataFrame(jdf, self.sql_ctx)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def filter(self, condition):
         """Filters rows using the given condition.
 
@@ -732,6 +781,7 @@ def filter(self, condition):
     where = filter
 
     @ignore_unicode_prefix
+    @since(1.3)
     def groupBy(self, *cols):
         """Groups the :class:`DataFrame` using the specified columns,
         so we can run aggregation on them. See :class:`GroupedData`
@@ -755,6 +805,7 @@ def groupBy(self, *cols):
         from pyspark.sql.group import GroupedData
         return GroupedData(jdf, self.sql_ctx)
 
+    @since(1.3)
     def agg(self, *exprs):
         """ Aggregate on the entire :class:`DataFrame` without groups
         (shorthand for ``df.groupBy.agg()``).
@@ -767,6 +818,7 @@ def agg(self, *exprs):
         """
         return self.groupBy().agg(*exprs)
 
+    @since(1.3)
     def unionAll(self, other):
         """ Return a new :class:`DataFrame` containing union of rows in this
         frame and another frame.
@@ -775,6 +827,7 @@ def unionAll(self, other):
         """
         return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx)
 
+    @since(1.3)
     def intersect(self, other):
         """ Return a new :class:`DataFrame` containing rows only in
         both this frame and another frame.
@@ -783,6 +836,7 @@ def intersect(self, other):
         """
         return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx)
 
+    @since(1.3)
     def subtract(self, other):
         """ Return a new :class:`DataFrame` containing rows in this frame
         but not in another frame.
@@ -791,6 +845,7 @@ def subtract(self, other):
         """
         return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
 
+    @since(1.4)
     def dropDuplicates(self, subset=None):
         """Return a new :class:`DataFrame` with duplicate rows removed,
         optionally only considering certain columns.
@@ -821,6 +876,7 @@ def dropDuplicates(self, subset=None):
             jdf = self._jdf.dropDuplicates(self._jseq(subset))
         return DataFrame(jdf, self.sql_ctx)
 
+    @since("1.3.1")
     def dropna(self, how='any', thresh=None, subset=None):
         """Returns a new :class:`DataFrame` omitting rows with null values.
 
@@ -863,6 +919,7 @@ def dropna(self, how='any', thresh=None, subset=None):
 
         return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sql_ctx)
 
+    @since("1.3.1")
     def fillna(self, value, subset=None):
         """Replace null values, alias for ``na.fill()``.
 
@@ -924,6 +981,7 @@ def fillna(self, value, subset=None):
 
             return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx)
 
+    @since(1.4)
     def replace(self, to_replace, value, subset=None):
         """Returns a new :class:`DataFrame` replacing a value with another value.
 
@@ -999,6 +1057,7 @@ def replace(self, to_replace, value, subset=None):
         return DataFrame(
             self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx)
 
+    @since(1.4)
     def corr(self, col1, col2, method=None):
         """
         Calculates the correlation of two columns of a DataFrame as a double value. Currently only
@@ -1020,6 +1079,7 @@ def corr(self, col1, col2, method=None):
                              "coefficient is supported.")
         return self._jdf.stat().corr(col1, col2, method)
 
+    @since(1.4)
     def cov(self, col1, col2):
         """
         Calculate the sample covariance for the given columns, specified by their names, as a
@@ -1034,6 +1094,7 @@ def cov(self, col1, col2):
             raise ValueError("col2 should be a string.")
         return self._jdf.stat().cov(col1, col2)
 
+    @since(1.4)
     def crosstab(self, col1, col2):
         """
         Computes a pair-wise frequency table of the given columns. Also known as a contingency
@@ -1055,6 +1116,7 @@ def crosstab(self, col1, col2):
             raise ValueError("col2 should be a string.")
         return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx)
 
+    @since(1.4)
     def freqItems(self, cols, support=None):
         """
         Finding frequent items for columns, possibly with false positives. Using the
@@ -1076,6 +1138,7 @@ def freqItems(self, cols, support=None):
         return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx)
 
     @ignore_unicode_prefix
+    @since(1.3)
     def withColumn(self, colName, col):
         """Returns a new :class:`DataFrame` by adding a column.
 
@@ -1088,6 +1151,7 @@ def withColumn(self, colName, col):
         return self.select('*', col.alias(colName))
 
     @ignore_unicode_prefix
+    @since(1.3)
     def withColumnRenamed(self, existing, new):
         """Returns a new :class:`DataFrame` by renaming an existing column.
 
@@ -1102,6 +1166,7 @@ def withColumnRenamed(self, existing, new):
                 for c in self.columns]
         return self.select(*cols)
 
+    @since(1.4)
     @ignore_unicode_prefix
     def drop(self, colName):
         """Returns a new :class:`DataFrame` that drops the specified column.
@@ -1114,6 +1179,7 @@ def drop(self, colName):
         jdf = self._jdf.drop(colName)
         return DataFrame(jdf, self.sql_ctx)
 
+    @since(1.3)
     def toPandas(self):
         """Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index fbe9bf5b526af..9b0d7f3e6656e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -26,6 +26,7 @@
 from pyspark import SparkContext
 from pyspark.rdd import _prepare_for_python_RDD, ignore_unicode_prefix
 from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
+from pyspark.sql import since
 from pyspark.sql.types import StringType
 from pyspark.sql.column import Column, _to_java_column, _to_seq
 
@@ -78,6 +79,18 @@ def _(col1, col2):
     'sqrt': 'Computes the square root of the specified float value.',
     'abs': 'Computes the absolute value.',
 
+    'max': 'Aggregate function: returns the maximum value of the expression in a group.',
+    'min': 'Aggregate function: returns the minimum value of the expression in a group.',
+    'first': 'Aggregate function: returns the first value in a group.',
+    'last': 'Aggregate function: returns the last value in a group.',
+    'count': 'Aggregate function: returns the number of items in a group.',
+    'sum': 'Aggregate function: returns the sum of all values in the expression.',
+    'avg': 'Aggregate function: returns the average of the values in a group.',
+    'mean': 'Aggregate function: returns the average of the values in a group.',
+    'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
+}
+
+_functions_1_4 = {
     # unary math functions
     'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' +
             '0.0 through pi.',
@@ -102,21 +115,11 @@ def _(col1, col2):
     'tan': 'Computes the tangent of the given value.',
     'tanh': 'Computes the hyperbolic tangent of the given value.',
     'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
-             'measured in degrees.',
+                 'measured in degrees.',
     'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
-             'measured in radians.',
+                 'measured in radians.',
 
     'bitwiseNOT': 'Computes bitwise not.',
-
-    'max': 'Aggregate function: returns the maximum value of the expression in a group.',
-    'min': 'Aggregate function: returns the minimum value of the expression in a group.',
-    'first': 'Aggregate function: returns the first value in a group.',
-    'last': 'Aggregate function: returns the last value in a group.',
-    'count': 'Aggregate function: returns the number of items in a group.',
-    'sum': 'Aggregate function: returns the sum of all values in the expression.',
-    'avg': 'Aggregate function: returns the average of the values in a group.',
-    'mean': 'Aggregate function: returns the average of the values in a group.',
-    'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
 }
 
 # math functions that take two arguments as input
@@ -128,15 +131,18 @@ def _(col1, col2):
 }
 
 for _name, _doc in _functions.items():
-    globals()[_name] = _create_function(_name, _doc)
+    globals()[_name] = since(1.3)(_create_function(_name, _doc))
+for _name, _doc in _functions_1_4.items():
+    globals()[_name] = since(1.4)(_create_function(_name, _doc))
 for _name, _doc in _binary_mathfunctions.items():
-    globals()[_name] = _create_binary_mathfunction(_name, _doc)
+    globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc))
 del _name, _doc
 __all__ += _functions.keys()
 __all__ += _binary_mathfunctions.keys()
 __all__.sort()
 
 
+@since(1.4)
 def array(*cols):
     """Creates a new array column.
 
@@ -155,6 +161,7 @@ def array(*cols):
     return Column(jc)
 
 
+@since(1.3)
 def approxCountDistinct(col, rsd=None):
     """Returns a new :class:`Column` for approximate distinct count of ``col``.
 
@@ -169,6 +176,7 @@ def approxCountDistinct(col, rsd=None):
     return Column(jc)
 
 
+@since(1.4)
 def explode(col):
     """Returns a new row for each element in the given array or map.
 
@@ -189,6 +197,7 @@ def explode(col):
     return Column(jc)
 
 
+@since(1.4)
 def coalesce(*cols):
     """Returns the first column that is not null.
 
@@ -225,6 +234,7 @@ def coalesce(*cols):
     return Column(jc)
 
 
+@since(1.3)
 def countDistinct(col, *cols):
     """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
 
@@ -239,6 +249,7 @@ def countDistinct(col, *cols):
     return Column(jc)
 
 
+@since(1.4)
 def monotonicallyIncreasingId():
     """A column that generates monotonically increasing 64-bit integers.
 
@@ -259,6 +270,7 @@ def monotonicallyIncreasingId():
     return Column(sc._jvm.functions.monotonicallyIncreasingId())
 
 
+@since(1.4)
 def rand(seed=None):
     """Generates a random column with i.i.d. samples from U[0.0, 1.0].
     """
@@ -270,6 +282,7 @@ def rand(seed=None):
     return Column(jc)
 
 
+@since(1.4)
 def randn(seed=None):
     """Generates a column with i.i.d. samples from the standard normal distribution.
     """
@@ -281,6 +294,7 @@ def randn(seed=None):
     return Column(jc)
 
 
+@since(1.4)
 def sparkPartitionId():
     """A column for partition ID of the Spark task.
 
@@ -294,6 +308,7 @@ def sparkPartitionId():
 
 
 @ignore_unicode_prefix
+@since(1.4)
 def struct(*cols):
     """Creates a new struct column.
 
@@ -312,6 +327,7 @@ def struct(*cols):
     return Column(jc)
 
 
+@since(1.4)
 def when(condition, value):
     """Evaluates a list of conditions and returns one of multiple possible result expressions.
     If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
@@ -336,6 +352,8 @@ def when(condition, value):
 class UserDefinedFunction(object):
     """
     User defined function in Python
+
+    .. versionadded:: 1.3
     """
     def __init__(self, func, returnType):
         self.func = func
@@ -369,6 +387,7 @@ def __call__(self, *cols):
         return Column(jc)
 
 
+@since(1.3)
 def udf(f, returnType=StringType()):
     """Creates a :class:`Column` expression representing a user defined function (UDF).
 
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 9f7c743c051d3..4da472a577eae 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -16,6 +16,7 @@
 #
 
 from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql import since
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.types import *
@@ -47,6 +48,8 @@ class GroupedData(object):
     """
     A set of methods for aggregations on a :class:`DataFrame`,
     created by :func:`DataFrame.groupBy`.
+
+    .. versionadded:: 1.3
     """
 
     def __init__(self, jdf, sql_ctx):
@@ -54,6 +57,7 @@ def __init__(self, jdf, sql_ctx):
         self.sql_ctx = sql_ctx
 
     @ignore_unicode_prefix
+    @since(1.3)
     def agg(self, *exprs):
         """Compute aggregates and returns the result as a :class:`DataFrame`.
 
@@ -86,6 +90,7 @@ def agg(self, *exprs):
         return DataFrame(jdf, self.sql_ctx)
 
     @dfapi
+    @since(1.3)
     def count(self):
         """Counts the number of records for each group.
 
@@ -94,6 +99,7 @@ def count(self):
         """
 
     @df_varargs_api
+    @since(1.3)
     def mean(self, *cols):
         """Computes average values for each numeric columns for each group.
 
@@ -108,6 +114,7 @@ def mean(self, *cols):
         """
 
     @df_varargs_api
+    @since(1.3)
     def avg(self, *cols):
         """Computes average values for each numeric columns for each group.
 
@@ -122,6 +129,7 @@ def avg(self, *cols):
         """
 
     @df_varargs_api
+    @since(1.3)
     def max(self, *cols):
         """Computes the max value for each numeric columns for each group.
 
@@ -132,6 +140,7 @@ def max(self, *cols):
         """
 
     @df_varargs_api
+    @since(1.3)
     def min(self, *cols):
         """Computes the min value for each numeric column for each group.
 
@@ -144,6 +153,7 @@ def min(self, *cols):
         """
 
     @df_varargs_api
+    @since(1.3)
     def sum(self, *cols):
         """Compute the sum for each numeric columns for each group.
 
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index e2b27fb587e73..02b3aab2b12e4 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -17,6 +17,7 @@
 
 from py4j.java_gateway import JavaClass
 
+from pyspark.sql import since
 from pyspark.sql.column import _to_seq
 from pyspark.sql.types import *
 
@@ -30,6 +31,8 @@ class DataFrameReader(object):
     to access this.
 
     ::Note: Experimental
+
+    .. versionadded:: 1.4
     """
 
     def __init__(self, sqlContext):
@@ -40,6 +43,7 @@ def _df(self, jdf):
         from pyspark.sql.dataframe import DataFrame
         return DataFrame(jdf, self._sqlContext)
 
+    @since(1.4)
     def load(self, path=None, format=None, schema=None, **options):
         """Loads data from a data source and returns it as a :class`DataFrame`.
 
@@ -63,6 +67,7 @@ def load(self, path=None, format=None, schema=None, **options):
         else:
             return self._df(jreader.load())
 
+    @since(1.4)
     def json(self, path, schema=None):
         """
         Loads a JSON file (one object per line) and returns the result as
@@ -107,6 +112,7 @@ def json(self, path, schema=None):
             jdf = self._jreader.schema(jschema).json(path)
         return self._df(jdf)
 
+    @since(1.4)
     def table(self, tableName):
         """Returns the specified table as a :class:`DataFrame`.
 
@@ -117,6 +123,7 @@ def table(self, tableName):
         """
         return self._df(self._jreader.table(tableName))
 
+    @since(1.4)
     def parquet(self, *path):
         """Loads a Parquet file, returning the result as a :class:`DataFrame`.
 
@@ -130,6 +137,7 @@ def parquet(self, *path):
         """
         return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, path)))
 
+    @since(1.4)
     def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None,
              predicates=None, properties={}):
         """
@@ -178,12 +186,15 @@ class DataFrameWriter(object):
     to access this.
 
     ::Note: Experimental
+
+    .. versionadded:: 1.4
     """
     def __init__(self, df):
         self._df = df
         self._sqlContext = df.sql_ctx
         self._jwrite = df._jdf.write()
 
+    @since(1.4)
     def save(self, path=None, format=None, mode="error", **options):
         """
         Saves the contents of the :class:`DataFrame` to a data source.
@@ -215,6 +226,7 @@ def save(self, path=None, format=None, mode="error", **options):
         else:
             jwrite.save(path)
 
+    @since(1.4)
     def saveAsTable(self, name, format=None, mode="error", **options):
         """
         Saves the contents of this :class:`DataFrame` to a data source as a table.
@@ -243,6 +255,7 @@ def saveAsTable(self, name, format=None, mode="error", **options):
             jwrite = jwrite.option(k, options[k])
         return jwrite.saveAsTable(name)
 
+    @since(1.4)
     def json(self, path, mode="error"):
         """
         Saves the content of the :class:`DataFrame` in JSON format at the
@@ -261,6 +274,7 @@ def json(self, path, mode="error"):
         """
         return self._jwrite.mode(mode).json(path)
 
+    @since(1.4)
     def parquet(self, path, mode="error"):
         """
         Saves the content of the :class:`DataFrame` in Parquet format at the
@@ -279,6 +293,7 @@ def parquet(self, path, mode="error"):
         """
         return self._jwrite.mode(mode).parquet(path)
 
+    @since(1.4)
     def jdbc(self, url, table, mode="error", properties={}):
         """
         Saves the content of the :class:`DataFrame` to a external database table

From 947ea1cf5f6986aa687631d6cf9f0fb974ee7caf Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 20 May 2015 23:38:58 -0700
Subject: [PATCH 293/320] [SPARK-7753] [MLLIB] Update KernelDensity API

Update `KernelDensity` API to make it extensible to different kernels in the future. `bandwidth` is used instead of `standardDeviation`. The static `kernelDensity` method is removed from `Statistics`. The implementation is updated using BLAS, while the algorithm remains the same. sryza srowen

Author: Xiangrui Meng <meng@databricks.com>

Closes #6279 from mengxr/SPARK-7753 and squashes the following commits:

4cdfadc [Xiangrui Meng] add example code in the doc
767fd5a [Xiangrui Meng] update KernelDensity API
---
 .../spark/mllib/stat/KernelDensity.scala      | 109 +++++++++++++-----
 .../apache/spark/mllib/stat/Statistics.scala  |  14 ---
 .../spark/mllib/stat/KernelDensitySuite.scala |   7 +-
 3 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index 79747cc5d7d74..a6bfe26e1e4f5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -17,52 +17,101 @@
 
 package org.apache.spark.mllib.stat
 
+import com.github.fommil.netlib.BLAS.{getInstance => blas}
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
 
-private[stat] object KernelDensity {
+/**
+ * :: Experimental ::
+ * Kernel density estimation. Given a sample from a population, estimate its probability density
+ * function at each of the given evaluation points using kernels. Only Gaussian kernel is supported.
+ *
+ * Scala example:
+ *
+ * {{{
+ * val sample = sc.parallelize(Seq(0.0, 1.0, 4.0, 4.0))
+ * val kd = new KernelDensity()
+ *   .setSample(sample)
+ *   .setBandwidth(3.0)
+ * val densities = kd.estimate(Array(-1.0, 2.0, 5.0))
+ * }}}
+ */
+@Experimental
+class KernelDensity extends Serializable {
+
+  import KernelDensity._
+
+  /** Bandwidth of the kernel function. */
+  private var bandwidth: Double = 1.0
+
+  /** A sample from a population. */
+  private var sample: RDD[Double] = _
+
   /**
-   * Given a set of samples from a distribution, estimates its density at the set of given points.
-   * Uses a Gaussian kernel with the given standard deviation.
+   * Sets the bandwidth (standard deviation) of the Gaussian kernel (default: `1.0`).
    */
-  def estimate(samples: RDD[Double], standardDeviation: Double,
-      evaluationPoints: Array[Double]): Array[Double] = {
-    if (standardDeviation <= 0.0) {
-      throw new IllegalArgumentException("Standard deviation must be positive")
-    }
+  def setBandwidth(bandwidth: Double): this.type = {
+    require(bandwidth > 0, s"Bandwidth must be positive, but got $bandwidth.")
+    this.bandwidth = bandwidth
+    this
+  }
 
-    // This gets used in each Gaussian PDF computation, so compute it up front
-    val logStandardDeviationPlusHalfLog2Pi =
-      math.log(standardDeviation) + 0.5 * math.log(2 * math.Pi)
+  /**
+   * Sets the sample to use for density estimation.
+   */
+  def setSample(sample: RDD[Double]): this.type = {
+    this.sample = sample
+    this
+  }
+
+  /**
+   * Sets the sample to use for density estimation (for Java users).
+   */
+  def setSample(sample: JavaRDD[java.lang.Double]): this.type = {
+    this.sample = sample.rdd.asInstanceOf[RDD[Double]]
+    this
+  }
+
+  /**
+   * Estimates probability density function at the given array of points.
+   */
+  def estimate(points: Array[Double]): Array[Double] = {
+    val sample = this.sample
+    val bandwidth = this.bandwidth
+
+    require(sample != null, "Must set sample before calling estimate.")
 
-    val (points, count) = samples.aggregate((new Array[Double](evaluationPoints.length), 0))(
+    val n = points.length
+    // This gets used in each Gaussian PDF computation, so compute it up front
+    val logStandardDeviationPlusHalfLog2Pi = math.log(bandwidth) + 0.5 * math.log(2 * math.Pi)
+    val (densities, count) = sample.aggregate((new Array[Double](n), 0L))(
       (x, y) => {
         var i = 0
-        while (i < evaluationPoints.length) {
-          x._1(i) += normPdf(y, standardDeviation, logStandardDeviationPlusHalfLog2Pi,
-            evaluationPoints(i))
+        while (i < n) {
+          x._1(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, points(i))
           i += 1
         }
-        (x._1, i)
+        (x._1, n)
       },
       (x, y) => {
-        var i = 0
-        while (i < evaluationPoints.length) {
-          x._1(i) += y._1(i)
-          i += 1
-        }
+        blas.daxpy(n, 1.0, y._1, 1, x._1, 1)
         (x._1, x._2 + y._2)
       })
-
-    var i = 0
-    while (i < points.length) {
-      points(i) /= count
-      i += 1
-    }
-    points
+    blas.dscal(n, 1.0 / count, densities, 1)
+    densities
   }
+}
+
+private object KernelDensity {
 
-  private def normPdf(mean: Double, standardDeviation: Double,
-      logStandardDeviationPlusHalfLog2Pi: Double, x: Double): Double = {
+  /** Evaluates the PDF of a normal distribution. */
+  def normPdf(
+      mean: Double,
+      standardDeviation: Double,
+      logStandardDeviationPlusHalfLog2Pi: Double,
+      x: Double): Double = {
     val x0 = x - mean
     val x1 = x0 / standardDeviation
     val logDensity = -0.5 * x1 * x1 - logStandardDeviationPlusHalfLog2Pi
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 32561620ac914..b3fad0c52d655 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -149,18 +149,4 @@ object Statistics {
   def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = {
     ChiSqTest.chiSquaredFeatures(data)
   }
-
-  /**
-   * Given an empirical distribution defined by the input RDD of samples, estimate its density at
-   * each of the given evaluation points using a Gaussian kernel.
-   *
-   * @param samples The samples RDD used to define the empirical distribution.
-   * @param standardDeviation The standard deviation of the kernel Gaussians.
-   * @param evaluationPoints The points at which to estimate densities.
-   * @return An array the same size as evaluationPoints with the density at each point.
-   */
-  def kernelDensity(samples: RDD[Double], standardDeviation: Double,
-      evaluationPoints: Iterable[Double]): Array[Double] = {
-    KernelDensity.estimate(samples, standardDeviation, evaluationPoints.toArray)
-  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
index 16ecae23dd9d4..14bb1cebf0b8f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.mllib.stat
 
-import org.scalatest.FunSuite
-
 import org.apache.commons.math3.distribution.NormalDistribution
+import org.scalatest.FunSuite
 
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 
@@ -27,7 +26,7 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext {
   test("kernel density single sample") {
     val rdd = sc.parallelize(Array(5.0))
     val evaluationPoints = Array(5.0, 6.0)
-    val densities = KernelDensity.estimate(rdd, 3.0, evaluationPoints)
+    val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints)
     val normal = new NormalDistribution(5.0, 3.0)
     val acceptableErr = 1e-6
     assert(densities(0) - normal.density(5.0) < acceptableErr)
@@ -37,7 +36,7 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext {
   test("kernel density multiple samples") {
     val rdd = sc.parallelize(Array(5.0, 10.0))
     val evaluationPoints = Array(5.0, 6.0)
-    val densities = KernelDensity.estimate(rdd, 3.0, evaluationPoints)
+    val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints)
     val normal1 = new NormalDistribution(5.0, 3.0)
     val normal2 = new NormalDistribution(10.0, 3.0)
     val acceptableErr = 1e-6

From 1ee8eb431e04db16f95f0bcb3a546ad6e14b616f Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Thu, 21 May 2015 00:30:55 -0700
Subject: [PATCH 294/320] [SPARK-7745] Change asserts to requires for user
 input checks in Spark Streaming

Assertions can be turned off. `require` throws an `IllegalArgumentException` which makes more sense when it's a user set variable.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #6271 from brkyvz/streaming-require and squashes the following commits:

d249484 [Burak Yavuz] fix merge conflict
264adb8 [Burak Yavuz] addressed comments v1.0
6161350 [Burak Yavuz] fix tests
16aa766 [Burak Yavuz] changed more assertions to more meaningful errors
afd923d [Burak Yavuz] changed some assertions to require
---
 .../apache/spark/streaming/DStreamGraph.scala |  4 +-
 .../spark/streaming/StreamingContext.scala    | 11 ++---
 .../streaming/api/python/PythonDStream.scala  |  4 +-
 .../spark/streaming/dstream/DStream.scala     | 45 +++++++++----------
 .../dstream/ReducedWindowedDStream.scala      |  4 +-
 .../scheduler/ReceivedBlockTracker.scala      |  2 +-
 .../streaming/StreamingContextSuite.scala     |  6 +--
 7 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 85b354ff4aa0d..40789c66f3991 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -157,10 +157,10 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   def validate() {
     this.synchronized {
-      assert(batchDuration != null, "Batch duration has not been set")
+      require(batchDuration != null, "Batch duration has not been set")
       // assert(batchDuration >= Milliseconds(100), "Batch duration of " + batchDuration +
       // " is very low")
-      assert(getOutputStreams().size > 0, "No output streams registered, so nothing to execute")
+      require(getOutputStreams().size > 0, "No output operations registered, so nothing to execute")
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 95063692e1146..160fc42c57d18 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -156,7 +156,7 @@ class StreamingContext private[streaming] (
       cp_.graph.restoreCheckpointData()
       cp_.graph
     } else {
-      assert(batchDur_ != null, "Batch duration for streaming context cannot be null")
+      require(batchDur_ != null, "Batch duration for StreamingContext cannot be null")
       val newGraph = new DStreamGraph()
       newGraph.setBatchDuration(batchDur_)
       newGraph
@@ -462,7 +462,8 @@ class StreamingContext private[streaming] (
       directory, FileInputDStream.defaultFilter : Path => Boolean, newFilesOnly=true, conf)
     val data = br.map { case (k, v) =>
       val bytes = v.getBytes
-      assert(bytes.length == recordLength, "Byte array does not have correct length")
+      require(bytes.length == recordLength, "Byte array does not have correct length. " +
+        s"${bytes.length} did not equal recordLength: $recordLength")
       bytes
     }
     data
@@ -568,7 +569,7 @@ class StreamingContext private[streaming] (
   /**
    * Start the execution of the streams.
    *
-   * @throws SparkException if the StreamingContext is already stopped.
+   * @throws IllegalStateException if the StreamingContext is already stopped.
    */
   def start(): Unit = synchronized {
     state match {
@@ -587,7 +588,7 @@ class StreamingContext private[streaming] (
       case ACTIVE =>
         logWarning("StreamingContext has already been started")
       case STOPPED =>
-        throw new SparkException("StreamingContext has already been stopped")
+        throw new IllegalStateException("StreamingContext has already been stopped")
     }
   }
 
@@ -689,7 +690,7 @@ object StreamingContext extends Logging {
   private def assertNoOtherContextIsActive(): Unit = {
     ACTIVATION_LOCK.synchronized {
       if (activeContext.get() != null) {
-        throw new SparkException(
+        throw new IllegalStateException(
           "Only one StreamingContext may be started in this JVM. " +
             "Currently running StreamingContext was started at" +
             activeContext.get.startSite.get.longForm)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
index 4c28654ef6413..d06401245ff17 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
@@ -109,7 +109,7 @@ private[python] object PythonTransformFunctionSerializer {
   }
 
   def serialize(func: PythonTransformFunction): Array[Byte] = {
-    assert(serializer != null, "Serializer has not been registered!")
+    require(serializer != null, "Serializer has not been registered!")
     // get the id of PythonTransformFunction in py4j
     val h = Proxy.getInvocationHandler(func.asInstanceOf[Proxy])
     val f = h.getClass().getDeclaredField("id")
@@ -119,7 +119,7 @@ private[python] object PythonTransformFunctionSerializer {
   }
 
   def deserialize(bytes: Array[Byte]): PythonTransformFunction = {
-    assert(serializer != null, "Serializer has not been registered!")
+    require(serializer != null, "Serializer has not been registered!")
     serializer.loads(bytes)
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 7c50a766a9bad..c858647c6406d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -217,53 +217,52 @@ abstract class DStream[T: ClassTag] (
       case StreamingContextState.INITIALIZED =>
         // good to go
       case StreamingContextState.ACTIVE =>
-        throw new SparkException(
+        throw new IllegalStateException(
           "Adding new inputs, transformations, and output operations after " +
             "starting a context is not supported")
       case StreamingContextState.STOPPED =>
-        throw new SparkException(
+        throw new IllegalStateException(
           "Adding new inputs, transformations, and output operations after " +
             "stopping a context is not supported")
     }
   }
 
   private[streaming] def validateAtStart() {
-    assert(rememberDuration != null, "Remember duration is set to null")
+    require(rememberDuration != null, "Remember duration is set to null")
 
-    assert(
+    require(
       !mustCheckpoint || checkpointDuration != null,
       "The checkpoint interval for " + this.getClass.getSimpleName + " has not been set." +
         " Please use DStream.checkpoint() to set the interval."
     )
 
-    assert(
+    require(
      checkpointDuration == null || context.sparkContext.checkpointDir.isDefined,
-      "The checkpoint directory has not been set. Please use StreamingContext.checkpoint()" +
-      " or SparkContext.checkpoint() to set the checkpoint directory."
+      "The checkpoint directory has not been set. Please set it by StreamingContext.checkpoint()."
     )
 
-    assert(
+    require(
       checkpointDuration == null || checkpointDuration >= slideDuration,
       "The checkpoint interval for " + this.getClass.getSimpleName + " has been set to " +
         checkpointDuration + " which is lower than its slide time (" + slideDuration + "). " +
         "Please set it to at least " + slideDuration + "."
     )
 
-    assert(
+    require(
       checkpointDuration == null || checkpointDuration.isMultipleOf(slideDuration),
       "The checkpoint interval for " + this.getClass.getSimpleName + " has been set to " +
         checkpointDuration + " which not a multiple of its slide time (" + slideDuration + "). " +
-        "Please set it to a multiple " + slideDuration + "."
+        "Please set it to a multiple of " + slideDuration + "."
     )
 
-    assert(
+    require(
       checkpointDuration == null || storageLevel != StorageLevel.NONE,
       "" + this.getClass.getSimpleName + " has been marked for checkpointing but the storage " +
         "level has not been set to enable persisting. Please use DStream.persist() to set the " +
         "storage level to use memory for better checkpointing performance."
     )
 
-    assert(
+    require(
       checkpointDuration == null || rememberDuration > checkpointDuration,
       "The remember duration for " + this.getClass.getSimpleName + " has been set to " +
         rememberDuration + " which is not more than the checkpoint interval (" +
@@ -272,7 +271,7 @@ abstract class DStream[T: ClassTag] (
 
     val metadataCleanerDelay = MetadataCleaner.getDelaySeconds(ssc.conf)
     logInfo("metadataCleanupDelay = " + metadataCleanerDelay)
-    assert(
+    require(
       metadataCleanerDelay < 0 || rememberDuration.milliseconds < metadataCleanerDelay * 1000,
       "It seems you are doing some DStream window operation or setting a checkpoint interval " +
         "which requires " + this.getClass.getSimpleName + " to remember generated RDDs for more " +
@@ -633,8 +632,8 @@ abstract class DStream[T: ClassTag] (
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
   def foreachRDD(foreachFunc: (RDD[T], Time) => Unit): Unit = ssc.withScope {
-    // because the DStream is reachable from the outer object here, and because 
-    // DStreams can't be serialized with closures, we can't proactively check 
+    // because the DStream is reachable from the outer object here, and because
+    // DStreams can't be serialized with closures, we can't proactively check
     // it for serializability and so we pass the optional false to SparkContext.clean
     new ForEachDStream(this, context.sparkContext.clean(foreachFunc, false)).register()
   }
@@ -644,8 +643,8 @@ abstract class DStream[T: ClassTag] (
    * on each RDD of 'this' DStream.
    */
   def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = ssc.withScope {
-    // because the DStream is reachable from the outer object here, and because 
-    // DStreams can't be serialized with closures, we can't proactively check 
+    // because the DStream is reachable from the outer object here, and because
+    // DStreams can't be serialized with closures, we can't proactively check
     // it for serializability and so we pass the optional false to SparkContext.clean
     val cleanedF = context.sparkContext.clean(transformFunc, false)
     transform((r: RDD[T], t: Time) => cleanedF(r))
@@ -656,8 +655,8 @@ abstract class DStream[T: ClassTag] (
    * on each RDD of 'this' DStream.
    */
   def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = ssc.withScope {
-    // because the DStream is reachable from the outer object here, and because 
-    // DStreams can't be serialized with closures, we can't proactively check 
+    // because the DStream is reachable from the outer object here, and because
+    // DStreams can't be serialized with closures, we can't proactively check
     // it for serializability and so we pass the optional false to SparkContext.clean
     val cleanedF = context.sparkContext.clean(transformFunc, false)
     val realTransformFunc =  (rdds: Seq[RDD[_]], time: Time) => {
@@ -674,8 +673,8 @@ abstract class DStream[T: ClassTag] (
   def transformWith[U: ClassTag, V: ClassTag](
       other: DStream[U], transformFunc: (RDD[T], RDD[U]) => RDD[V]
     ): DStream[V] = ssc.withScope {
-    // because the DStream is reachable from the outer object here, and because 
-    // DStreams can't be serialized with closures, we can't proactively check 
+    // because the DStream is reachable from the outer object here, and because
+    // DStreams can't be serialized with closures, we can't proactively check
     // it for serializability and so we pass the optional false to SparkContext.clean
     val cleanedF = ssc.sparkContext.clean(transformFunc, false)
     transformWith(other, (rdd1: RDD[T], rdd2: RDD[U], time: Time) => cleanedF(rdd1, rdd2))
@@ -688,8 +687,8 @@ abstract class DStream[T: ClassTag] (
   def transformWith[U: ClassTag, V: ClassTag](
       other: DStream[U], transformFunc: (RDD[T], RDD[U], Time) => RDD[V]
     ): DStream[V] = ssc.withScope {
-    // because the DStream is reachable from the outer object here, and because 
-    // DStreams can't be serialized with closures, we can't proactively check 
+    // because the DStream is reachable from the outer object here, and because
+    // DStreams can't be serialized with closures, we can't proactively check
     // it for serializability and so we pass the optional false to SparkContext.clean
     val cleanedF = ssc.sparkContext.clean(transformFunc, false)
     val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
index 1385ccbf56ee5..df9f7f140eddc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
@@ -40,12 +40,12 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag](
     partitioner: Partitioner
   ) extends DStream[(K,V)](parent.ssc) {
 
-  assert(_windowDuration.isMultipleOf(parent.slideDuration),
+  require(_windowDuration.isMultipleOf(parent.slideDuration),
     "The window duration of ReducedWindowedDStream (" + _windowDuration + ") " +
       "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")"
   )
 
-  assert(_slideDuration.isMultipleOf(parent.slideDuration),
+  require(_slideDuration.isMultipleOf(parent.slideDuration),
     "The slide duration of ReducedWindowedDStream (" + _slideDuration + ") " +
       "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")"
   )
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index a9f4147a5f020..7720259a5d794 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -153,7 +153,7 @@ private[streaming] class ReceivedBlockTracker(
    * returns only after the files are cleaned up.
    */
   def cleanupOldBatches(cleanupThreshTime: Time, waitForCompletion: Boolean): Unit = synchronized {
-    assert(cleanupThreshTime.milliseconds < clock.getTimeMillis())
+    require(cleanupThreshTime.milliseconds < clock.getTimeMillis())
     val timesToCleanup = timeToAllocatedBlocks.keys.filter { _ < cleanupThreshTime }.toSeq
     logInfo("Deleting batches " + timesToCleanup)
     writeToLog(BatchCleanupEvent(timesToCleanup))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 3a958bf3a3c19..f8e8030791df1 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -182,7 +182,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     ssc = new StreamingContext(master, appName, batchDuration)
     addInputStream(ssc).register()
     ssc.stop()
-    intercept[SparkException] {
+    intercept[IllegalStateException] {
       ssc.start() // start after stop should throw exception
     }
     assert(ssc.getState() === StreamingContextState.STOPPED)
@@ -600,7 +600,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     val anotherInput = addInputStream(anotherSsc)
     anotherInput.foreachRDD { rdd => rdd.count }
 
-    val exception = intercept[SparkException] {
+    val exception = intercept[IllegalStateException] {
       anotherSsc.start()
     }
     assert(exception.getMessage.contains("StreamingContext"), "Did not get the right exception")
@@ -623,7 +623,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
 
     def testForException(clue: String, expectedErrorMsg: String)(body: => Unit): Unit = {
       withClue(clue) {
-        val ex = intercept[SparkException] {
+        val ex = intercept[IllegalStateException] {
           body
         }
         assert(ex.getMessage.toLowerCase().contains(expectedErrorMsg))

From feb3a9d3f81f19850fddbd9639823f59a60efa52 Mon Sep 17 00:00:00 2001
From: Cheng Hao <hao.cheng@intel.com>
Date: Thu, 21 May 2015 09:28:00 -0700
Subject: [PATCH 295/320] [SPARK-7320] [SQL] [Minor] Move the testData into
 beforeAll()

Follow up of #6340, to avoid the test report missing once it fails.

Author: Cheng Hao <hao.cheng@intel.com>

Closes #6312 from chenghao-intel/rollup_minor and squashes the following commits:

b03a25f [Cheng Hao] simplify the testData instantiation
09b7e8b [Cheng Hao] move the testData into beforeAll()
---
 .../spark/sql/hive/HiveDataFrameAnalyticsSuite.scala   | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
index 99de14660f676..fb10f8583da99 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
@@ -17,25 +17,21 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.scalatest.BeforeAndAfterAll
 
-case class TestData2Int(a: Int, b: Int)
-
 // TODO ideally we should put the test suite into the package `sql`, as
 // `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
 // support the `cube` or `rollup` yet.
 class HiveDataFrameAnalyticsSuite extends QueryTest with BeforeAndAfterAll {
-  val testData =
-    TestHive.sparkContext.parallelize(
-      TestData2Int(1, 2) ::
-        TestData2Int(2, 4) :: Nil).toDF()
+  private var testData: DataFrame = _
 
   override def beforeAll() {
+    testData = Seq((1, 2), (2, 4)).toDF("a", "b")
     TestHive.registerDataFrameAsTable(testData, "mytable")
   }
 

From a25c1ab8f04a4e19d82ff4c18a0b1689d8b3ddac Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Thu, 21 May 2015 09:58:47 -0700
Subject: [PATCH 296/320] [SPARK-7565] [SQL] fix MapType in JsonRDD

The key of Map in JsonRDD should be converted into UTF8String (also failed records), Thanks to yhuai viirya

Closes #6084

Author: Davies Liu <davies@databricks.com>

Closes #6299 from davies/string_in_json and squashes the following commits:

0dbf559 [Davies Liu] improve test, fix corrupt record
6836a80 [Davies Liu] move unit tests into Scala
b97af11 [Davies Liu] fix MapType in JsonRDD
---
 .../apache/spark/sql/json/JacksonParser.scala |  8 +++---
 .../org/apache/spark/sql/json/JsonRDD.scala   | 16 +++++++----
 .../org/apache/spark/sql/json/JsonSuite.scala | 28 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
index 81611513582a8..0e223758051a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala
@@ -150,10 +150,10 @@ private[sql] object JacksonParser {
   private def convertMap(
       factory: JsonFactory,
       parser: JsonParser,
-      valueType: DataType): Map[String, Any] = {
-    val builder = Map.newBuilder[String, Any]
+      valueType: DataType): Map[UTF8String, Any] = {
+    val builder = Map.newBuilder[UTF8String, Any]
     while (nextUntil(parser, JsonToken.END_OBJECT)) {
-      builder += parser.getCurrentName -> convertField(factory, parser, valueType)
+      builder += UTF8String(parser.getCurrentName) -> convertField(factory, parser, valueType)
     }
 
     builder.result()
@@ -181,7 +181,7 @@ private[sql] object JacksonParser {
       val row = new GenericMutableRow(schema.length)
       for (corruptIndex <- schema.getFieldIndex(columnNameOfCorruptRecords)) {
         require(schema(corruptIndex).dataType == StringType)
-        row.update(corruptIndex, record)
+        row.update(corruptIndex, UTF8String(record))
       }
 
       Seq(row)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 4c32710a17bc7..037a6d60a2ed6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -20,18 +20,18 @@ package org.apache.spark.sql.json
 import java.sql.Timestamp
 
 import scala.collection.Map
-import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
+import scala.collection.convert.Wrappers.{JListWrapper, JMapWrapper}
 
-import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException}
+import com.fasterxml.jackson.core.JsonProcessingException
 import com.fasterxml.jackson.databind.ObjectMapper
 
+import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.util.DateUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.Logging
 
 private[sql] object JsonRDD extends Logging {
 
@@ -318,7 +318,8 @@ private[sql] object JsonRDD extends Logging {
 
           parsed
         } catch {
-          case e: JsonProcessingException => Map(columnNameOfCorruptRecords -> record) :: Nil
+          case e: JsonProcessingException =>
+            Map(columnNameOfCorruptRecords -> UTF8String(record)) :: Nil
         }
       }
     })
@@ -422,7 +423,10 @@ private[sql] object JsonRDD extends Logging {
           value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType))
         case MapType(StringType, valueType, _) =>
           val map = value.asInstanceOf[Map[String, Any]]
-          map.mapValues(enforceCorrectType(_, valueType)).map(identity)
+          map.map {
+            case (k, v) =>
+              (UTF8String(k), enforceCorrectType(v, valueType))
+          }.map(identity)
         case struct: StructType => asRow(value.asInstanceOf[Map[String, Any]], struct)
         case DateType => toDate(value)
         case TimestampType => toTimestamp(value)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 6f747e5846f74..7e6eeba17752a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -25,7 +25,6 @@ import org.scalactic.Tolerance._
 
 import org.apache.spark.sql.TestData._
 import org.apache.spark.sql.catalyst.util.DateUtils
-import org.apache.spark.sql.functions._
 import org.apache.spark.sql.json.InferSchema.compatibleType
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.test.TestSQLContext
@@ -1074,4 +1073,31 @@ class JsonSuite extends QueryTest {
     assert(StructType(Seq()) === emptySchema)
   }
 
+  test("SPARK-7565 MapType in JsonRDD") {
+    val useStreaming = getConf(SQLConf.USE_JACKSON_STREAMING_API, "true")
+    val oldColumnNameOfCorruptRecord = TestSQLContext.conf.columnNameOfCorruptRecord
+    TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, "_unparsed")
+
+    val schemaWithSimpleMap = StructType(
+      StructField("map", MapType(StringType, IntegerType, true), false) :: Nil)
+    try{
+      for (useStreaming <- List("true", "false")) {
+        setConf(SQLConf.USE_JACKSON_STREAMING_API, useStreaming)
+        val temp = Utils.createTempDir().getPath
+
+        val df = read.schema(schemaWithSimpleMap).json(mapType1)
+        df.write.mode("overwrite").parquet(temp)
+        // order of MapType is not defined
+        assert(read.parquet(temp).count() == 5)
+
+        val df2 = read.json(corruptRecords)
+        df2.write.mode("overwrite").parquet(temp)
+        checkAnswer(read.parquet(temp), df2.collect())
+      }
+    } finally {
+      setConf(SQLConf.USE_JACKSON_STREAMING_API, useStreaming)
+      setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, oldColumnNameOfCorruptRecord)
+    }
+  }
+
 }

From 13348e21b6b1c0df42c18b82b86c613291228863 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 21 May 2015 10:30:08 -0700
Subject: [PATCH 297/320] [SPARK-7752] [MLLIB] Use lowercase letters for
 NaiveBayes.modelType

to be consistent with other string names in MLlib. This PR also updates the implementation to use vals instead of hardcoded strings. jkbradley leahmcguire

Author: Xiangrui Meng <meng@databricks.com>

Closes #6277 from mengxr/SPARK-7752 and squashes the following commits:

f38b662 [Xiangrui Meng] add another case _ back in test
ae5c66a [Xiangrui Meng] model type -> modelType
711d1c6 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7752
40ae53e [Xiangrui Meng] fix Java test suite
264a814 [Xiangrui Meng] add case _ back
3c456a8 [Xiangrui Meng] update NB user guide
17bba53 [Xiangrui Meng] update naive Bayes to use lowercase model type strings
---
 docs/mllib-naive-bayes.md                     |  9 ++-
 .../mllib/classification/NaiveBayes.scala     | 75 +++++++++++--------
 .../classification/JavaNaiveBayesSuite.java   |  4 +-
 .../classification/NaiveBayesSuite.scala      | 46 ++++++------
 4 files changed, 75 insertions(+), 59 deletions(-)

diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 9780ea52c4994..56a2e9ca86bb1 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -21,7 +21,7 @@ Within that context, each observation is a document and each
 feature represents a term whose value is the frequency of the term (in multinomial naive Bayes) or
 a zero or one indicating whether the term was found in the document (in Bernoulli naive Bayes).
 Feature values must be nonnegative. The model type is selected with an optional parameter
-"Multinomial" or "Bernoulli" with "Multinomial" as the default.
+"multinomial" or "bernoulli" with "multinomial" as the default.
 [Additive smoothing](http://en.wikipedia.org/wiki/Lidstone_smoothing) can be used by
 setting the parameter $\lambda$ (default to $1.0$). For document classification, the input feature
 vectors are usually sparse, and sparse vectors should be supplied as input to take advantage of
@@ -35,7 +35,7 @@ sparsity. Since the training data is only used once, it is not necessary to cach
 [NaiveBayes](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayes$) implements
 multinomial naive Bayes. It takes an RDD of
 [LabeledPoint](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) and an optional
-smoothing parameter `lambda` as input, an optional model type parameter (default is Multinomial), and outputs a
+smoothing parameter `lambda` as input, an optional model type parameter (default is "multinomial"), and outputs a
 [NaiveBayesModel](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayesModel), which
 can be used for evaluation and prediction.
 
@@ -54,7 +54,7 @@ val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
 val training = splits(0)
 val test = splits(1)
 
-val model = NaiveBayes.train(training, lambda = 1.0, model = "Multinomial")
+val model = NaiveBayes.train(training, lambda = 1.0, model = "multinomial")
 
 val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
 val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()
@@ -75,6 +75,8 @@ optionally smoothing parameter `lambda` as input, and output a
 can be used for evaluation and prediction.
 
 {% highlight java %}
+import scala.Tuple2;
+
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.function.Function;
@@ -82,7 +84,6 @@ import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.mllib.classification.NaiveBayes;
 import org.apache.spark.mllib.classification.NaiveBayesModel;
 import org.apache.spark.mllib.regression.LabeledPoint;
-import scala.Tuple2;
 
 JavaRDD<LabeledPoint> training = ... // training set
 JavaRDD<LabeledPoint> test = ... // test set
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index cffe9ef1e0b2a..f51ee36d0dfcb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -25,13 +25,12 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkContext, SparkException}
-import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SQLContext}
 
-
 /**
  * Model for Naive Bayes Classifiers.
  *
@@ -39,7 +38,7 @@ import org.apache.spark.sql.{DataFrame, SQLContext}
  * @param pi log of class priors, whose dimension is C, number of labels
  * @param theta log of class conditional probabilities, whose dimension is C-by-D,
  *              where D is number of features
- * @param modelType The type of NB model to fit  can be "Multinomial" or "Bernoulli"
+ * @param modelType The type of NB model to fit  can be "multinomial" or "bernoulli"
  */
 class NaiveBayesModel private[mllib] (
     val labels: Array[Double],
@@ -48,11 +47,13 @@ class NaiveBayesModel private[mllib] (
     val modelType: String)
   extends ClassificationModel with Serializable with Saveable {
 
+  import NaiveBayes.{Bernoulli, Multinomial, supportedModelTypes}
+
   private val piVector = new DenseVector(pi)
-  private val thetaMatrix = new DenseMatrix(labels.size, theta(0).size, theta.flatten, true)
+  private val thetaMatrix = new DenseMatrix(labels.length, theta(0).length, theta.flatten, true)
 
   private[mllib] def this(labels: Array[Double], pi: Array[Double], theta: Array[Array[Double]]) =
-    this(labels, pi, theta, "Multinomial")
+    this(labels, pi, theta, NaiveBayes.Multinomial)
 
   /** A Java-friendly constructor that takes three Iterable parameters. */
   private[mllib] def this(
@@ -61,12 +62,15 @@ class NaiveBayesModel private[mllib] (
       theta: JIterable[JIterable[Double]]) =
     this(labels.asScala.toArray, pi.asScala.toArray, theta.asScala.toArray.map(_.asScala.toArray))
 
+  require(supportedModelTypes.contains(modelType),
+    s"Invalid modelType $modelType. Supported modelTypes are $supportedModelTypes.")
+
   // Bernoulli scoring requires log(condprob) if 1, log(1-condprob) if 0.
   // This precomputes log(1.0 - exp(theta)) and its sum which are used for the linear algebra
   // application of this condition (in predict function).
   private val (thetaMinusNegTheta, negThetaSum) = modelType match {
-    case "Multinomial" => (None, None)
-    case "Bernoulli" =>
+    case Multinomial => (None, None)
+    case Bernoulli =>
       val negTheta = thetaMatrix.map(value => math.log(1.0 - math.exp(value)))
       val ones = new DenseVector(Array.fill(thetaMatrix.numCols){1.0})
       val thetaMinusNegTheta = thetaMatrix.map { value =>
@@ -75,7 +79,7 @@ class NaiveBayesModel private[mllib] (
       (Option(thetaMinusNegTheta), Option(negTheta.multiply(ones)))
     case _ =>
       // This should never happen.
-      throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType")
+      throw new UnknownError(s"Invalid modelType: $modelType.")
   }
 
   override def predict(testData: RDD[Vector]): RDD[Double] = {
@@ -88,15 +92,15 @@ class NaiveBayesModel private[mllib] (
 
   override def predict(testData: Vector): Double = {
     modelType match {
-      case "Multinomial" =>
+      case Multinomial =>
         val prob = thetaMatrix.multiply(testData)
         BLAS.axpy(1.0, piVector, prob)
         labels(prob.argmax)
-      case "Bernoulli" =>
+      case Bernoulli =>
         testData.foreachActive { (index, value) =>
           if (value != 0.0 && value != 1.0) {
             throw new SparkException(
-              s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $testData.")
+              s"Bernoulli naive Bayes requires 0 or 1 feature values but found $testData.")
           }
         }
         val prob = thetaMinusNegTheta.get.multiply(testData)
@@ -105,7 +109,7 @@ class NaiveBayesModel private[mllib] (
         labels(prob.argmax)
       case _ =>
         // This should never happen.
-        throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType")
+        throw new UnknownError(s"Invalid modelType: $modelType.")
     }
   }
 
@@ -230,16 +234,16 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
         s"($loadedClassName, $version).  Supported:\n" +
         s"  ($classNameV1_0, 1.0)")
     }
-    assert(model.pi.size == numClasses,
+    assert(model.pi.length == numClasses,
       s"NaiveBayesModel.load expected $numClasses classes," +
-        s" but class priors vector pi had ${model.pi.size} elements")
-    assert(model.theta.size == numClasses,
+        s" but class priors vector pi had ${model.pi.length} elements")
+    assert(model.theta.length == numClasses,
       s"NaiveBayesModel.load expected $numClasses classes," +
-        s" but class conditionals array theta had ${model.theta.size} elements")
-    assert(model.theta.forall(_.size == numFeatures),
+        s" but class conditionals array theta had ${model.theta.length} elements")
+    assert(model.theta.forall(_.length == numFeatures),
       s"NaiveBayesModel.load expected $numFeatures features," +
         s" but class conditionals array theta had elements of size:" +
-        s" ${model.theta.map(_.size).mkString(",")}")
+        s" ${model.theta.map(_.length).mkString(",")}")
     model
   }
 }
@@ -257,9 +261,11 @@ class NaiveBayes private (
     private var lambda: Double,
     private var modelType: String) extends Serializable with Logging {
 
-  def this(lambda: Double) = this(lambda, "Multinomial")
+  import NaiveBayes.{Bernoulli, Multinomial}
 
-  def this() = this(1.0, "Multinomial")
+  def this(lambda: Double) = this(lambda, NaiveBayes.Multinomial)
+
+  def this() = this(1.0, NaiveBayes.Multinomial)
 
   /** Set the smoothing parameter. Default: 1.0. */
   def setLambda(lambda: Double): NaiveBayes = {
@@ -272,12 +278,11 @@ class NaiveBayes private (
 
   /**
    * Set the model type using a string (case-sensitive).
-   * Supported options: "Multinomial" and "Bernoulli".
-   * (default: Multinomial)
+   * Supported options: "multinomial" (default) and "bernoulli".
    */
-  def setModelType(modelType:String): NaiveBayes = {
+  def setModelType(modelType: String): NaiveBayes = {
     require(NaiveBayes.supportedModelTypes.contains(modelType),
-      s"NaiveBayes was created with an unknown ModelType: $modelType")
+      s"NaiveBayes was created with an unknown modelType: $modelType.")
     this.modelType = modelType
     this
   }
@@ -308,7 +313,7 @@ class NaiveBayes private (
       }
       if (!values.forall(v => v == 0.0 || v == 1.0)) {
         throw new SparkException(
-          s"Bernoulli Naive Bayes requires 0 or 1 feature values but found $v.")
+          s"Bernoulli naive Bayes requires 0 or 1 feature values but found $v.")
       }
     }
 
@@ -317,7 +322,7 @@ class NaiveBayes private (
     // TODO: similar to reduceByKeyLocally to save one stage.
     val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, DenseVector)](
       createCombiner = (v: Vector) => {
-        if (modelType == "Bernoulli") {
+        if (modelType == Bernoulli) {
           requireZeroOneBernoulliValues(v)
         } else {
           requireNonnegativeValues(v)
@@ -352,11 +357,11 @@ class NaiveBayes private (
       labels(i) = label
       pi(i) = math.log(n + lambda) - piLogDenom
       val thetaLogDenom = modelType match {
-        case "Multinomial" => math.log(sumTermFreqs.values.sum + numFeatures * lambda)
-        case "Bernoulli" => math.log(n + 2.0 * lambda)
+        case Multinomial => math.log(sumTermFreqs.values.sum + numFeatures * lambda)
+        case Bernoulli => math.log(n + 2.0 * lambda)
         case _ =>
           // This should never happen.
-          throw new UnknownError(s"NaiveBayes was created with an unknown ModelType: $modelType")
+          throw new UnknownError(s"Invalid modelType: $modelType.")
       }
       var j = 0
       while (j < numFeatures) {
@@ -375,8 +380,14 @@ class NaiveBayes private (
  */
 object NaiveBayes {
 
+  /** String name for multinomial model type. */
+  private[classification] val Multinomial: String = "multinomial"
+
+  /** String name for Bernoulli model type. */
+  private[classification] val Bernoulli: String = "bernoulli"
+
   /* Set of modelTypes that NaiveBayes supports */
-  private[mllib] val supportedModelTypes = Set("Multinomial", "Bernoulli")
+  private[classification] val supportedModelTypes = Set(Multinomial, Bernoulli)
 
   /**
    * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
@@ -406,7 +417,7 @@ object NaiveBayes {
    * @param lambda The smoothing parameter
    */
   def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
-    new NaiveBayes(lambda, "Multinomial").run(input)
+    new NaiveBayes(lambda, Multinomial).run(input)
   }
 
   /**
@@ -429,7 +440,7 @@ object NaiveBayes {
    */
   def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
     require(supportedModelTypes.contains(modelType),
-      s"NaiveBayes was created with an unknown ModelType: $modelType")
+      s"NaiveBayes was created with an unknown modelType: $modelType.")
     new NaiveBayes(lambda, modelType).run(input)
   }
 
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
index 71fb7f13c39c2..3771c0ea7ad83 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
@@ -108,7 +108,7 @@ public Vector call(LabeledPoint v) throws Exception {
   @Test
   public void testModelTypeSetters() {
     NaiveBayes nb = new NaiveBayes()
-        .setModelType("Bernoulli")
-        .setModelType("Multinomial");
+      .setModelType("bernoulli")
+      .setModelType("multinomial");
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index 40a79a1f19bd9..c111a78a55806 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -19,9 +19,8 @@ package org.apache.spark.mllib.classification
 
 import scala.util.Random
 
-import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum, Axis}
+import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 import breeze.stats.distributions.{Multinomial => BrzMultinomial}
-
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkException
@@ -30,9 +29,10 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.util.Utils
 
-
 object NaiveBayesSuite {
 
+  import NaiveBayes.{Multinomial, Bernoulli}
+
   private def calcLabel(p: Double, pi: Array[Double]): Int = {
     var sum = 0.0
     for (j <- 0 until pi.length) {
@@ -48,7 +48,7 @@ object NaiveBayesSuite {
     theta: Array[Array[Double]],  // CXD
     nPoints: Int,
     seed: Int,
-    modelType: String = "Multinomial",
+    modelType: String = Multinomial,
     sample: Int = 10): Seq[LabeledPoint] = {
     val D = theta(0).length
     val rnd = new Random(seed)
@@ -58,10 +58,10 @@ object NaiveBayesSuite {
     for (i <- 0 until nPoints) yield {
       val y = calcLabel(rnd.nextDouble(), _pi)
       val xi = modelType match {
-        case "Bernoulli" => Array.tabulate[Double] (D) { j =>
+        case Bernoulli => Array.tabulate[Double] (D) { j =>
             if (rnd.nextDouble () < _theta(y)(j) ) 1 else 0
         }
-        case "Multinomial" =>
+        case Multinomial =>
           val mult = BrzMultinomial(BDV(_theta(y)))
           val emptyMap = (0 until D).map(x => (x, 0.0)).toMap
           val counts = emptyMap ++ mult.sample(sample).groupBy(x => x).map {
@@ -70,7 +70,7 @@ object NaiveBayesSuite {
           counts.toArray.sortBy(_._1).map(_._2)
         case _ =>
           // This should never happen.
-          throw new UnknownError(s"NaiveBayesSuite found unknown ModelType: $modelType")
+          throw new UnknownError(s"Invalid modelType: $modelType.")
       }
 
       LabeledPoint(y, Vectors.dense(xi))
@@ -79,17 +79,17 @@ object NaiveBayesSuite {
 
   /** Bernoulli NaiveBayes with binary labels, 3 features */
   private val binaryBernoulliModel = new NaiveBayesModel(labels = Array(0.0, 1.0),
-    pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)),
-    "Bernoulli")
+    pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)), Bernoulli)
 
   /** Multinomial NaiveBayes with binary labels, 3 features */
   private val binaryMultinomialModel = new NaiveBayesModel(labels = Array(0.0, 1.0),
-    pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)),
-    "Multinomial")
+    pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)), Multinomial)
 }
 
 class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
 
+  import NaiveBayes.{Multinomial, Bernoulli}
+
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
     val numOfPredictions = predictions.zip(input).count {
       case (prediction, expected) =>
@@ -117,6 +117,11 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("model types") {
+    assert(Multinomial === "multinomial")
+    assert(Bernoulli === "bernoulli")
+  }
+
   test("get, set params") {
     val nb = new NaiveBayes()
     nb.setLambda(2.0)
@@ -134,16 +139,15 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
       Array(0.10, 0.10, 0.70, 0.10)  // label 2
     ).map(_.map(math.log))
 
-    val testData = NaiveBayesSuite.generateNaiveBayesInput(
-      pi, theta, nPoints, 42, "Multinomial")
+    val testData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 42, Multinomial)
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
 
-    val model = NaiveBayes.train(testRDD, 1.0, "Multinomial")
+    val model = NaiveBayes.train(testRDD, 1.0, Multinomial)
     validateModelFit(pi, theta, model)
 
     val validationData = NaiveBayesSuite.generateNaiveBayesInput(
-      pi, theta, nPoints, 17, "Multinomial")
+      pi, theta, nPoints, 17, Multinomial)
     val validationRDD = sc.parallelize(validationData, 2)
 
     // Test prediction on RDD.
@@ -163,15 +167,15 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
     ).map(_.map(math.log))
 
     val testData = NaiveBayesSuite.generateNaiveBayesInput(
-      pi, theta, nPoints, 45, "Bernoulli")
+      pi, theta, nPoints, 45, Bernoulli)
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
 
-    val model = NaiveBayes.train(testRDD, 1.0, "Bernoulli")
+    val model = NaiveBayes.train(testRDD, 1.0, Bernoulli)
     validateModelFit(pi, theta, model)
 
     val validationData = NaiveBayesSuite.generateNaiveBayesInput(
-      pi, theta, nPoints, 20, "Bernoulli")
+      pi, theta, nPoints, 20, Bernoulli)
     val validationRDD = sc.parallelize(validationData, 2)
 
     // Test prediction on RDD.
@@ -216,7 +220,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
       LabeledPoint(1.0, Vectors.dense(0.0)))
 
     intercept[SparkException] {
-      NaiveBayes.train(sc.makeRDD(badTrain, 2), 1.0, "Bernoulli")
+      NaiveBayes.train(sc.makeRDD(badTrain, 2), 1.0, Bernoulli)
     }
 
     val okTrain = Seq(
@@ -235,7 +239,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
       Vectors.dense(1.0),
       Vectors.dense(0.0))
 
-    val model = NaiveBayes.train(sc.makeRDD(okTrain, 2), 1.0, "Bernoulli")
+    val model = NaiveBayes.train(sc.makeRDD(okTrain, 2), 1.0, Bernoulli)
     intercept[SparkException] {
       model.predict(sc.makeRDD(badPredict, 2)).collect()
     }
@@ -275,7 +279,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
       assert(model.labels === sameModel.labels)
       assert(model.pi === sameModel.pi)
       assert(model.theta === sameModel.theta)
-      assert(model.modelType === "Multinomial")
+      assert(model.modelType === Multinomial)
     } finally {
       Utils.deleteRecursively(tempDir)
     }

From 8730fbb47b09fcf955fe16dd03b75596db6d53b6 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Thu, 21 May 2015 10:56:17 -0700
Subject: [PATCH 298/320] [SPARK-7749] [SQL] Fixes partition discovery for
 non-partitioned tables

When no partition columns can be found, we should have an empty `PartitionSpec`, rather than a `PartitionSpec` with empty partition columns.

This PR together with #6285 should fix SPARK-7749.

Author: Cheng Lian <lian@databricks.com>
Author: Yin Huai <yhuai@databricks.com>

Closes #6287 from liancheng/spark-7749 and squashes the following commits:

a799ff3 [Cheng Lian] Adds test cases for SPARK-7749
c4949be [Cheng Lian] Minor refactoring, and tolerant _TEMPORARY directory name
5aa87ea [Yin Huai] Make parsePartitions more robust.
fc56656 [Cheng Lian] Returns empty PartitionSpec if no partition columns can be inferred
19ae41e [Cheng Lian] Don't list base directory as leaf directory
---
 .../spark/sql/sources/PartitioningUtils.scala | 84 +++++++++++++------
 .../apache/spark/sql/sources/interfaces.scala |  7 +-
 .../ParquetPartitionDiscoverySuite.scala      | 49 +++++++++--
 .../apache/spark/sql/hive/parquetSuites.scala | 51 ++++++++++-
 4 files changed, 150 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
index 8f8138d6ebebc..e0ead23d786f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala
@@ -33,6 +33,10 @@ private[sql] case class Partition(values: Row, path: String)
 
 private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition])
 
+private[sql] object PartitionSpec {
+  val emptySpec = PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[Partition])
+}
+
 private[sql] object PartitioningUtils {
   // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since sql/core doesn't
   // depend on Hive.
@@ -68,20 +72,37 @@ private[sql] object PartitioningUtils {
   private[sql] def parsePartitions(
       paths: Seq[Path],
       defaultPartitionName: String): PartitionSpec = {
-    val partitionValues = resolvePartitions(paths.flatMap(parsePartition(_, defaultPartitionName)))
-    val fields = {
-      val (PartitionValues(columnNames, literals)) = partitionValues.head
-      columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
-        StructField(name, dataType, nullable = true)
-      }
+    // First, we need to parse every partition's path and see if we can find partition values.
+    val pathsWithPartitionValues = paths.flatMap { path =>
+      parsePartition(path, defaultPartitionName).map(path -> _)
     }
 
-    val partitions = partitionValues.zip(paths).map {
-      case (PartitionValues(_, literals), path) =>
-        Partition(Row(literals.map(_.value): _*), path.toString)
-    }
+    if (pathsWithPartitionValues.isEmpty) {
+      // This dataset is not partitioned.
+      PartitionSpec.emptySpec
+    } else {
+      // This dataset is partitioned. We need to check whether all partitions have the same
+      // partition columns and resolve potential type conflicts.
+      val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues.map(_._2))
+
+      // Creates the StructType which represents the partition columns.
+      val fields = {
+        val PartitionValues(columnNames, literals) = resolvedPartitionValues.head
+        columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
+          // We always assume partition columns are nullable since we've no idea whether null values
+          // will be appended in the future.
+          StructField(name, dataType, nullable = true)
+        }
+      }
+
+      // Finally, we create `Partition`s based on paths and resolved partition values.
+      val partitions = resolvedPartitionValues.zip(pathsWithPartitionValues).map {
+        case (PartitionValues(_, literals), (path, _)) =>
+          Partition(Row.fromSeq(literals.map(_.value)), path.toString)
+      }
 
-    PartitionSpec(StructType(fields), partitions)
+      PartitionSpec(StructType(fields), partitions)
+    }
   }
 
   /**
@@ -111,7 +132,7 @@ private[sql] object PartitioningUtils {
     while (!finished) {
       // Sometimes (e.g., when speculative task is enabled), temporary directories may be left
       // uncleaned.  Here we simply ignore them.
-      if (chopped.getName == "_temporary") {
+      if (chopped.getName.toLowerCase == "_temporary") {
         return None
       }
 
@@ -121,8 +142,12 @@ private[sql] object PartitioningUtils {
       finished = maybeColumn.isEmpty || chopped.getParent == null
     }
 
-    val (columnNames, values) = columns.reverse.unzip
-    Some(PartitionValues(columnNames, values))
+    if (columns.isEmpty) {
+      None
+    } else {
+      val (columnNames, values) = columns.reverse.unzip
+      Some(PartitionValues(columnNames, values))
+    }
   }
 
   private def parsePartitionColumn(
@@ -156,20 +181,25 @@ private[sql] object PartitioningUtils {
   private[sql] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = {
     // Column names of all partitions must match
     val distinctPartitionsColNames = values.map(_.columnNames).distinct
-    assert(distinctPartitionsColNames.size == 1, {
-      val list = distinctPartitionsColNames.mkString("\t", "\n", "")
-      s"Conflicting partition column names detected:\n$list"
-    })
-
-    // Resolves possible type conflicts for each column
-    val columnCount = values.head.columnNames.size
-    val resolvedValues = (0 until columnCount).map { i =>
-      resolveTypeConflicts(values.map(_.literals(i)))
-    }
 
-    // Fills resolved literals back to each partition
-    values.zipWithIndex.map { case (d, index) =>
-      d.copy(literals = resolvedValues.map(_(index)))
+    if (distinctPartitionsColNames.isEmpty) {
+      Seq.empty
+    } else {
+      assert(distinctPartitionsColNames.size == 1, {
+        val list = distinctPartitionsColNames.mkString("\t", "\n", "")
+        s"Conflicting partition column names detected:\n$list"
+      })
+
+      // Resolves possible type conflicts for each column
+      val columnCount = values.head.columnNames.size
+      val resolvedValues = (0 until columnCount).map { i =>
+        resolveTypeConflicts(values.map(_.literals(i)))
+      }
+
+      // Fills resolved literals back to each partition
+      values.zipWithIndex.map { case (d, index) =>
+        d.copy(literals = resolvedValues.map(_(index)))
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 6a917bf38b139..fcbac0d457950 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -462,12 +462,7 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
 
   private def discoverPartitions(): PartitionSpec = {
     val leafDirs = fileStatusCache.leafDirs.keys.toSeq
-
-    if (leafDirs.nonEmpty) {
-      PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME)
-    } else {
-      PartitionSpec(StructType(Array.empty[StructField]), Array.empty[Partition])
-    }
+    PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index 1927114b8d58f..907dbb0119b40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.sources.PartitioningUtils._
-import org.apache.spark.sql.sources.{Partition, PartitionSpec}
+import org.apache.spark.sql.sources.{LogicalRelation, Partition, PartitionSpec}
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{QueryTest, Row, SQLContext}
@@ -66,12 +66,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
       assert(message.contains(expected))
     }
 
-    check("file:///", Some {
-      PartitionValues(
-        ArrayBuffer.empty[String],
-        ArrayBuffer.empty[Literal])
-    })
-
     check("file://path/a=10", Some {
       PartitionValues(
         ArrayBuffer("a"),
@@ -93,6 +87,10 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
         ArrayBuffer(Literal.create(1.5, FloatType)))
     })
 
+    check("file:///", None)
+    check("file:///path/_temporary", None)
+    check("file:///path/_temporary/c=1.5", None)
+    check("file:///path/_temporary/path", None)
     check("file://path/a=10/_temporary/c=1.5", None)
     check("file://path/a=10/c=1.5/_temporary", None)
 
@@ -124,6 +122,25 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
           Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
           Partition(Row(10.5, "hello"), "hdfs://host:9000/path/a=10.5/b=hello"))))
 
+    check(Seq(
+      "hdfs://host:9000/path/_temporary",
+      "hdfs://host:9000/path/a=10/b=20",
+      "hdfs://host:9000/path/a=10.5/b=hello",
+      "hdfs://host:9000/path/a=10.5/_temporary",
+      "hdfs://host:9000/path/a=10.5/_TeMpOrArY",
+      "hdfs://host:9000/path/a=10.5/b=hello/_temporary",
+      "hdfs://host:9000/path/a=10.5/b=hello/_TEMPORARY",
+      "hdfs://host:9000/path/_temporary/path",
+      "hdfs://host:9000/path/a=11/_temporary/path",
+      "hdfs://host:9000/path/a=10.5/b=world/_temporary/path"),
+      PartitionSpec(
+        StructType(Seq(
+          StructField("a", FloatType),
+          StructField("b", StringType))),
+        Seq(
+          Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
+          Partition(Row(10.5, "hello"), "hdfs://host:9000/path/a=10.5/b=hello"))))
+
     check(Seq(
       s"hdfs://host:9000/path/a=10/b=20",
       s"hdfs://host:9000/path/a=$defaultPartitionName/b=hello"),
@@ -145,6 +162,11 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
         Seq(
           Partition(Row(10, null), s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),
           Partition(Row(10.5, null), s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"))))
+
+    check(Seq(
+      s"hdfs://host:9000/path1",
+      s"hdfs://host:9000/path2"),
+      PartitionSpec.emptySpec)
   }
 
   test("read partitioned table - normal case") {
@@ -334,4 +356,17 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
       }
     }
   }
+
+  test("SPARK-7749 Non-partitioned table should have empty partition spec") {
+    withTempPath { dir =>
+      (1 to 10).map(i => (i, i.toString)).toDF("a", "b").write.parquet(dir.getCanonicalPath)
+      val queryExecution = read.parquet(dir.getCanonicalPath).queryExecution
+      queryExecution.analyzed.collectFirst {
+        case LogicalRelation(relation: ParquetRelation2) =>
+          assert(relation.partitionSpec === PartitionSpec.emptySpec)
+      }.getOrElse {
+        fail(s"Expecting a ParquetRelation2, but got:\n$queryExecution")
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 05d99983b6a63..1da990bc959ba 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD}
 import org.apache.spark.sql.hive.execution.HiveTableScan
 import org.apache.spark.sql.hive.test.TestHive._
@@ -29,7 +30,7 @@ import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
 import org.apache.spark.sql.sources.{InsertIntoDataSource, InsertIntoHadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode}
+import org.apache.spark.sql.{DataFrame, QueryTest, SQLConf, SaveMode}
 import org.apache.spark.util.Utils
 
 // The data where the partitioning key exists only in the directory structure.
@@ -385,6 +386,54 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
     sql("DROP TABLE ms_convert")
   }
 
+  def collectParquetRelation(df: DataFrame): ParquetRelation2 = {
+    val plan = df.queryExecution.analyzed
+    plan.collectFirst {
+      case LogicalRelation(r: ParquetRelation2) => r
+    }.getOrElse {
+      fail(s"Expecting a ParquetRelation2, but got:\n$plan")
+    }
+  }
+
+  test("SPARK-7749: non-partitioned metastore Parquet table lookup should use cached relation") {
+    sql(
+      s"""CREATE TABLE nonPartitioned (
+         |  key INT,
+         |  value STRING
+         |)
+         |STORED AS PARQUET
+       """.stripMargin)
+
+    // First lookup fills the cache
+    val r1 = collectParquetRelation(table("nonPartitioned"))
+    // Second lookup should reuse the cache
+    val r2 = collectParquetRelation(table("nonPartitioned"))
+    // They should be the same instance
+    assert(r1 eq r2)
+
+    sql("DROP TABLE nonPartitioned")
+  }
+
+  test("SPARK-7749: partitioned metastore Parquet table lookup should use cached relation") {
+    sql(
+      s"""CREATE TABLE partitioned (
+         |  key INT,
+         |  value STRING
+         |)
+         |PARTITIONED BY (part INT)
+         |STORED AS PARQUET
+       """.stripMargin)
+
+    // First lookup fills the cache
+    val r1 = collectParquetRelation(table("partitioned"))
+    // Second lookup should reuse the cache
+    val r2 = collectParquetRelation(table("partitioned"))
+    // They should be the same instance
+    assert(r1 eq r2)
+
+    sql("DROP TABLE partitioned")
+  }
+
   test("Caching converted data source Parquet Relations") {
     def checkCached(tableIdentifer: catalog.QualifiedTableName): Unit = {
       // Converted test_parquet should be cached.

From 4b7ff3092c53827817079e0810563cbb0b9d0747 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 21 May 2015 11:39:32 -0700
Subject: [PATCH 299/320] [SPARK-7787] [STREAMING] Fix serialization issue of
 SerializableAWSCredentials

Lack of default constructor causes deserialization to fail. This occurs only when the AWS credentials are explicitly specified through KinesisUtils.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6316 from tdas/SPARK-7787 and squashes the following commits:

248ca5c [Tathagata Das] Fixed serializability
---
 .../streaming/kinesis/KinesisReceiver.scala   |  5 +++-
 .../kinesis/KinesisReceiverSuite.scala        | 30 ++++++++-----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 90164490efb2e..800202e9fb86a 100644
--- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -31,7 +31,10 @@ import org.apache.spark.util.Utils
 
 private[kinesis]
 case class SerializableAWSCredentials(accessKeyId: String, secretKey: String)
-  extends BasicAWSCredentials(accessKeyId, secretKey) with Serializable
+  extends AWSCredentials {
+  override def getAWSAccessKeyId: String = accessKeyId
+  override def getAWSSecretKey: String = secretKey
+}
 
 /**
  * Custom AWS Kinesis-specific implementation of Spark Streaming's Receiver.
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 7c17ee9dceddd..cd19c33b90050 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -20,27 +20,18 @@ import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions.seqAsJavaList
 
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.Milliseconds
-import org.apache.spark.streaming.Seconds
-import org.apache.spark.streaming.StreamingContext
-import org.apache.spark.streaming.TestSuiteBase
-import org.apache.spark.util.{ManualClock, Clock}
-
-import org.mockito.Mockito._
-import org.scalatest.BeforeAndAfter
-import org.scalatest.Matchers
-import org.scalatest.mock.MockitoSugar
-
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException
-import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException
+import com.amazonaws.services.kinesis.clientlibrary.exceptions.{InvalidStateException, KinesisClientLibDependencyException, ShutdownException, ThrottlingException}
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
-import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
+import org.mockito.Mockito._
+import org.scalatest.{BeforeAndAfter, Matchers}
+import org.scalatest.mock.MockitoSugar
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext, TestSuiteBase}
+import org.apache.spark.util.{Clock, ManualClock, Utils}
 
 /**
  * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor
@@ -99,6 +90,11 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     ssc.stop()
   }
 
+  test("check serializability of SerializableAWSCredentials") {
+    Utils.deserialize[SerializableAWSCredentials](
+      Utils.serialize(new SerializableAWSCredentials("x", "y")))
+  }
+
   test("process records including store and checkpoint") {
     when(receiverMock.isStopped()).thenReturn(false)
     when(checkpointStateMock.shouldCheckpoint()).thenReturn(true)

From 6e534026963e567f92743c5721de16325645223e Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 21 May 2015 19:42:51 +0100
Subject: [PATCH 300/320] [SPARK-6416] [DOCS] RDD.fold() requires the operator
 to be commutative

Document current limitation of rdd.fold.

This does not resolve SPARK-6416 but just documents the issue.
CC JoshRosen

Author: Sean Owen <sowen@cloudera.com>

Closes #6231 from srowen/SPARK-6416 and squashes the following commits:

9fef39f [Sean Owen] Add comment to other languages; reword to highlight the difference from non-distributed collections and to not suggest it is a bug that is to be fixed
da40d84 [Sean Owen] Document current limitation of rdd.fold.
---
 .../org/apache/spark/api/java/JavaRDDLike.scala     | 13 ++++++++++---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala  | 13 ++++++++++---
 python/pyspark/rdd.py                               | 12 ++++++++++--
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 8bf0627fc420d..74db7643224f5 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -386,9 +386,16 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
 
   /**
    * Aggregate the elements of each partition, and then the results for all the partitions, using a
-   * given associative function and a neutral "zero value". The function op(t1, t2) is allowed to
-   * modify t1 and return it as its result value to avoid object allocation; however, it should not
-   * modify t2.
+   * given associative and commutative function and a neutral "zero value". The function
+   * op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object
+   * allocation; however, it should not modify t2.
+   *
+   * This behaves somewhat differently from fold operations implemented for non-distributed
+   * collections in functional languages like Scala. This fold operation may be applied to
+   * partitions individually, and then fold those results into the final result, rather than
+   * apply the fold to each element sequentially in some defined ordering. For functions
+   * that are not commutative, the result may differ from that of a fold applied to a
+   * non-distributed collection.
    */
   def fold(zeroValue: T)(f: JFunction2[T, T, T]): T =
     rdd.fold(zeroValue)(f)
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index f7fa37e4cdcdc..d772f03f76651 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1015,9 +1015,16 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Aggregate the elements of each partition, and then the results for all the partitions, using a
-   * given associative function and a neutral "zero value". The function op(t1, t2) is allowed to
-   * modify t1 and return it as its result value to avoid object allocation; however, it should not
-   * modify t2.
+   * given associative and commutative function and a neutral "zero value". The function
+   * op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object
+   * allocation; however, it should not modify t2.
+   *
+   * This behaves somewhat differently from fold operations implemented for non-distributed
+   * collections in functional languages like Scala. This fold operation may be applied to
+   * partitions individually, and then fold those results into the final result, rather than
+   * apply the fold to each element sequentially in some defined ordering. For functions
+   * that are not commutative, the result may differ from that of a fold applied to a
+   * non-distributed collection.
    */
   def fold(zeroValue: T)(op: (T, T) => T): T = withScope {
     // Clone the zero value since we will also be serializing it as part of tasks
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 70db4bbe4cbc5..98a8ff8606366 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -813,13 +813,21 @@ def op(x, y):
     def fold(self, zeroValue, op):
         """
         Aggregate the elements of each partition, and then the results for all
-        the partitions, using a given associative function and a neutral "zero
-        value."
+        the partitions, using a given associative and commutative function and
+        a neutral "zero value."
 
         The function C{op(t1, t2)} is allowed to modify C{t1} and return it
         as its result value to avoid object allocation; however, it should not
         modify C{t2}.
 
+        This behaves somewhat differently from fold operations implemented
+        for non-distributed collections in functional languages like Scala.
+        This fold operation may be applied to partitions individually, and then
+        fold those results into the final result, rather than apply the fold
+        to each element sequentially in some defined ordering. For functions
+        that are not commutative, the result may differ from that of a fold
+        applied to a non-distributed collection.
+
         >>> from operator import add
         >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add)
         15

From 699906e538a3d03636adab546ca86d06d5d89293 Mon Sep 17 00:00:00 2001
From: kaka1992 <kaka_1992@163.com>
Date: Thu, 21 May 2015 11:50:39 -0700
Subject: [PATCH 301/320] [SPARK-7394][SQL] Add Pandas style cast (astype)

Author: kaka1992 <kaka_1992@163.com>

Closes #6313 from kaka1992/astype and squashes the following commits:

73dfd0b [kaka1992] [SPARK-7394] Add Pandas style cast (astype)
ad8feb2 [kaka1992] [SPARK-7394] Add Pandas style cast (astype)
4f328b7 [kaka1992] [SPARK-7394] Add Pandas style cast (astype)
---
 python/pyspark/sql/column.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index d03bb6d33dd03..baf1ecbd0a2fc 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -302,6 +302,8 @@ def cast(self, dataType):
             raise TypeError("unexpected type: %s" % type(dataType))
         return Column(jc)
 
+    astype = cast
+
     @ignore_unicode_prefix
     @since(1.3)
     def between(self, lowerBound, upperBound):

From 4f572008f804068c1a81cc334ff2367dbeae6493 Mon Sep 17 00:00:00 2001
From: Shuo Xiang <shuoxiangpub@gmail.com>
Date: Thu, 21 May 2015 12:09:44 -0700
Subject: [PATCH 302/320] [SPARK-7793] [MLLIB] Use getOrElse for getting the
 threshold of SVM model

same issue and fix as in Spark-7694.

Author: Shuo Xiang <shuoxiangpub@gmail.com>

Closes #6321 from coderxiang/nb and squashes the following commits:

a5e6de4 [Shuo Xiang] use getOrElse for svmmodel.tostring
2cb0177 [Shuo Xiang] Merge remote-tracking branch 'upstream/master' into nb
5f109b4 [Shuo Xiang] Merge remote-tracking branch 'upstream/master'
c5c5bfe [Shuo Xiang] Merge remote-tracking branch 'upstream/master'
98804c9 [Shuo Xiang] fix bug in topBykey and update test
---
 .../main/scala/org/apache/spark/mllib/classification/SVM.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 33104cf06c6ea..348485560713e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -89,7 +89,7 @@ class SVMModel (
   override protected def formatVersion: String = "1.0"
 
   override def toString: String = {
-    s"${super.toString}, numClasses = 2, threshold = ${threshold.get}"
+    s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
   }
 }
 

From f6c486aa4b0d3a50b53c110fd63d226fffeb87f7 Mon Sep 17 00:00:00 2001
From: scwf <wangfei1@huawei.com>
Date: Thu, 21 May 2015 12:31:58 -0700
Subject: [PATCH 303/320] [SQL] [TEST] udf_java_method failed due to jdk
 version

java.lang.Math.exp(1.0) has different result between jdk versions. so do not use createQueryTest, write a separate test for it.
```
jdk version   	result
1.7.0_11		2.7182818284590455
1.7.0_05        2.7182818284590455
1.7.0_71		2.718281828459045
```

Author: scwf <wangfei1@huawei.com>

Closes #6274 from scwf/java_method and squashes the following commits:

3dd2516 [scwf] address comments
5fa1459 [scwf] style
df46445 [scwf] fix test error
fcb6d22 [scwf] fix udf_java_method
---
 .../execution/HiveCompatibilitySuite.scala    |  6 +++--
 .../sql/hive/execution/HiveQuerySuite.scala   |  7 ++----
 .../sql/hive/execution/SQLQuerySuite.scala    | 23 +++++++++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index b6245a57074c8..0b1917a392901 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -250,7 +250,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
 
     // The isolated classloader seemed to make some of our test reset mechanisms less robust.
     "combine1", // This test changes compression settings in a way that breaks all subsequent tests.
-    "load_dyn_part14.*" // These work alone but fail when run with other tests...
+    "load_dyn_part14.*", // These work alone but fail when run with other tests...
+
+    // the answer is sensitive for jdk version
+    "udf_java_method"
   ) ++ HiveShim.compatibilityBlackList
 
   /**
@@ -877,7 +880,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_int",
     "udf_isnotnull",
     "udf_isnull",
-    "udf_java_method",
     "udf_lcase",
     "udf_length",
     "udf_lessthan",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index e7aec0b188c66..65c6ef03bf041 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -20,13 +20,10 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 import java.util.{Locale, TimeZone}
 
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
-import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorFactory, StructObjectInspector, ObjectInspector}
-import org.scalatest.BeforeAndAfter
-
 import scala.util.Try
 
+import org.scalatest.BeforeAndAfter
+
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
 import org.apache.spark.{SparkFiles, SparkException}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index fbbf6ba5947dc..ba53ed99beb03 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -814,4 +814,27 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT cast(key+2 as Int) from df_analysis A group by cast(key+1 as int)")
     }
   }
+
+  // `Math.exp(1.0)` has different result for different jdk version, so not use createQueryTest
+  test("udf_java_method") {
+    checkAnswer(sql(
+      """
+        |SELECT java_method("java.lang.String", "valueOf", 1),
+        |       java_method("java.lang.String", "isEmpty"),
+        |       java_method("java.lang.Math", "max", 2, 3),
+        |       java_method("java.lang.Math", "min", 2, 3),
+        |       java_method("java.lang.Math", "round", 2.5),
+        |       java_method("java.lang.Math", "exp", 1.0),
+        |       java_method("java.lang.Math", "floor", 1.9)
+        |FROM src tablesample (1 rows)
+      """.stripMargin),
+      Row(
+        "1",
+        "true",
+        java.lang.Math.max(2, 3).toString,
+        java.lang.Math.min(2, 3).toString,
+        java.lang.Math.round(2.5).toString,
+        java.lang.Math.exp(1.0).toString,
+        java.lang.Math.floor(1.9).toString))
+  }
 }

From 15680aeed425c900a5de34d12b61929d1e5df607 Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 21 May 2015 20:34:20 +0100
Subject: [PATCH 304/320] [SPARK-7775] YARN AM negative sleep exception

```
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
Exception in thread "Reporter" java.lang.IllegalArgumentException: timeout value is negative
  at java.lang.Thread.sleep(Native Method)
  at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$1.run(ApplicationMaster.scala:356)
```
This kills the reporter thread. This is caused by #6082 (merged into master branch only).

Author: Andrew Or <andrew@databricks.com>

Closes #6305 from andrewor14/yarn-negative-sleep and squashes the following commits:

b970770 [Andrew Or] Use existing cap
56d6e5e [Andrew Or] Avoid negative sleep
---
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 63a6f2e9472c1..af4927b0e4bf7 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -345,7 +345,7 @@ private[spark] class ApplicationMaster(
               if (numPendingAllocate > 0) {
                 val currentAllocationInterval =
                   math.min(heartbeatInterval, nextAllocationInterval)
-                nextAllocationInterval *= 2
+                nextAllocationInterval = currentAllocationInterval * 2 // avoid overflow
                 currentAllocationInterval
               } else {
                 nextAllocationInterval = initialAllocationInterval

From 6d75ed7e5ccf6c58143de4608115f9a2b3ff6cf4 Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Thu, 21 May 2015 13:05:48 -0700
Subject: [PATCH 305/320] [SPARK-7585] [ML] [DOC] VectorIndexer user guide
 section

Added VectorIndexer section to ML user guide.  Also added javaCategoryMaps() method and Java unit test for it.

CC: mengxr

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #6255 from jkbradley/vector-indexer-guide and squashes the following commits:

dbb8c4c [Joseph K. Bradley] simplified VectorIndexerModel.javaCategoryMaps
f692084 [Joseph K. Bradley] Added VectorIndexer section to ML user guide.  Also added javaCategoryMaps() method and Java unit test for it.
---
 docs/ml-features.md                           | 83 +++++++++++++++++++
 .../spark/ml/feature/VectorIndexer.scala      | 10 +++
 .../ml/feature/JavaVectorIndexerSuite.java    |  4 +-
 3 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 235029d71fadd..06f1ac196b39d 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -535,5 +535,88 @@ encoded = encoder.transform(indexed)
 </div>
 </div>
 
+## VectorIndexer
+
+`VectorIndexer` helps index categorical features in datasets of `Vector`s.
+It can both automatically decide which features are categorical and convert original values to category indices.  Specifically, it does the following:
+
+1. Take an input column of type [Vector](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and a parameter `maxCategories`.
+2. Decide which features should be categorical based on the number of distinct values, where features with at most `maxCategories` are declared categorical.
+3. Compute 0-based category indices for each categorical feature.
+4. Index categorical features and transform original feature values to indices.
+
+Indexing categorical features allows algorithms such as Decision Trees and Tree Ensembles to treat categorical features appropriately, improving performance.
+
+Please refer to the [VectorIndexer API docs](api/scala/index.html#org.apache.spark.ml.feature.VectorIndexer) for more details.
+
+In the example below, we read in a dataset of labeled points and then use `VectorIndexer` to decide which features should be treated as categorical.  We transform the categorical feature values to their indices.  This transformed data could then be passed to algorithms such as `DecisionTreeRegressor` that handle categorical features.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.ml.feature.VectorIndexer
+import org.apache.spark.mllib.util.MLUtils
+
+val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+val indexer = new VectorIndexer()
+  .setInputCol("features")
+  .setOutputCol("indexed")
+  .setMaxCategories(10)
+val indexerModel = indexer.fit(data)
+val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
+println(s"Chose ${categoricalFeatures.size} categorical features: " +
+  categoricalFeatures.mkString(", "))
+
+// Create new column "indexed" with categorical values transformed to indices
+val indexedData = indexerModel.transform(data)
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import java.util.Map;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.VectorIndexer;
+import org.apache.spark.ml.feature.VectorIndexerModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.sql.DataFrame;
+
+JavaRDD<LabeledPoint> rdd = MLUtils.loadLibSVMFile(sc.sc(),
+  "data/mllib/sample_libsvm_data.txt").toJavaRDD();
+DataFrame data = sqlContext.createDataFrame(rdd, LabeledPoint.class);
+VectorIndexer indexer = new VectorIndexer()
+  .setInputCol("features")
+  .setOutputCol("indexed")
+  .setMaxCategories(10);
+VectorIndexerModel indexerModel = indexer.fit(data);
+Map<Integer, Map<Double, Integer>> categoryMaps = indexerModel.javaCategoryMaps();
+System.out.print("Chose " + categoryMaps.size() + "categorical features:");
+for (Integer feature : categoryMaps.keySet()) {
+  System.out.print(" " + feature);
+}
+System.out.println();
+
+// Create new column "indexed" with categorical values transformed to indices
+DataFrame indexedData = indexerModel.transform(data);
+{% endhighlight %}
+</div>
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.ml.feature import VectorIndexer
+from pyspark.mllib.util import MLUtils
+
+data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
+indexerModel = indexer.fit(data)
+
+# Create new column "indexed" with categorical values transformed to indices
+indexedData = indexerModel.transform(data)
+{% endhighlight %}
+</div>
+</div>
+
 # Feature Selectors
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 6d1d0524e59ee..e238fb310ed37 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.ml.feature
 
+import java.lang.{Double => JDouble, Integer => JInt}
+import java.util.{Map => JMap}
+
+import scala.collection.JavaConverters._
+
 import org.apache.spark.annotation.AlphaComponent
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.attribute._
@@ -248,6 +253,11 @@ class VectorIndexerModel private[ml] (
     val categoryMaps: Map[Int, Map[Double, Int]])
   extends Model[VectorIndexerModel] with VectorIndexerParams {
 
+  /** Java-friendly version of [[categoryMaps]] */
+  def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = {
+    categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]]
+  }
+
   /**
    * Pre-computed feature attributes, with some missing info.
    * In transform(), set attribute name and other info, if available.
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
index 161100134c92d..c7ae5468b9429 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
@@ -19,6 +19,7 @@
 
 import java.io.Serializable;
 import java.util.List;
+import java.util.Map;
 
 import org.junit.After;
 import org.junit.Assert;
@@ -64,7 +65,8 @@ public void vectorIndexerAPI() {
       .setMaxCategories(2);
     VectorIndexerModel model = indexer.fit(data);
     Assert.assertEquals(model.numFeatures(), 2);
-    Assert.assertEquals(model.categoryMaps().size(), 1);
+    Map<Integer, Map<Double, Integer>> categoryMaps = model.javaCategoryMaps();
+    Assert.assertEquals(categoryMaps.size(), 1);
     DataFrame indexedData = model.transform(data);
   }
 }

From cdc7c055c931c4c931a11b510de473455f3256da Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 21 May 2015 13:06:53 -0700
Subject: [PATCH 306/320] [SPARK-7498] [MLLIB] add varargs back to setDefault

We removed `varargs` due to Java compilation issues. That was a false alarm because I didn't run `build/sbt clean`. So this PR reverts the changes. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6320 from mengxr/SPARK-7498 and squashes the following commits:

74a7259 [Xiangrui Meng] add varargs back to setDefault
---
 .../src/main/scala/org/apache/spark/ml/param/params.scala  | 7 +++----
 .../java/org/apache/spark/ml/param/JavaTestParams.java     | 1 +
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index c33b66d31cd4f..94abfcda5cf2a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -438,19 +438,18 @@ trait Params extends Identifiable with Serializable {
    * @param value  the default value
    */
   protected final def setDefault[T](param: Param[T], value: T): this.type = {
-    defaultParamMap.put(param, value)
+    defaultParamMap.put(param -> value)
     this
   }
 
   /**
    * Sets default values for a list of params.
    *
-   * Note: Java developers should use the single-parameter [[setDefault()]].
-   *       Annotating this with varargs causes compilation failures. See SPARK-7498.
    * @param paramPairs  a list of param pairs that specify params and their default values to set
    *                    respectively. Make sure that the params are initialized before this method
    *                    gets called.
    */
+  @varargs
   protected final def setDefault(paramPairs: ParamPair[_]*): this.type = {
     paramPairs.foreach { p =>
       setDefault(p.param.asInstanceOf[Param[Any]], p.value)
@@ -559,7 +558,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
   /**
    * Puts a (param, value) pair (overwrites if the input param exists).
    */
-  def put[T](param: Param[T], value: T): this.type = put(ParamPair(param, value))
+  def put[T](param: Param[T], value: T): this.type = put(param -> value)
 
   /**
    * Puts a list of param pairs (overwrites if the input params exists).
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
index 3a41890b92d63..947ae3a2ce06f 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
@@ -81,5 +81,6 @@ private void init() {
       ParamValidators.inArray(validStrings));
     setDefault(myIntParam_, 1);
     setDefault(myDoubleParam_, 0.5);
+    setDefault(myIntParam().w(1), myDoubleParam().w(0.5));
   }
 }

From 311fab6f1b00db1a581d77be5196dd045f93d83d Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 21 May 2015 13:50:08 -0700
Subject: [PATCH 307/320] [SPARK-7722] [STREAMING] Added Kinesis to style
 checker

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6325 from tdas/SPARK-7722 and squashes the following commits:

9ab35b2 [Tathagata Das] Fixed styles in Kinesis
---
 dev/scalastyle                                            | 8 ++++----
 .../spark/streaming/kinesis/KinesisReceiverSuite.scala    | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dev/scalastyle b/dev/scalastyle
index 7f014c82f14c6..ad93f7e85b27c 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,11 +17,11 @@
 # limitations under the License.
 #
 
-echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
-echo -e "q\n" | build/sbt -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pkinesis-asl -Phive -Phive-thriftserver scalastyle > scalastyle.txt
+echo -e "q\n" | build/sbt -Pkinesis-asl -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt
 # Check style with YARN built too
-echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt
-echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pkinesis-asl -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pkinesis-asl -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt
 
 ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
 rm scalastyle.txt
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index cd19c33b90050..2103dca6b766f 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -57,7 +57,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
   var checkpointStateMock: KinesisCheckpointState = _
   var currentClockMock: Clock = _
 
-  override def beforeFunction() = {
+  override def beforeFunction(): Unit = {
     receiverMock = mock[KinesisReceiver]
     checkpointerMock = mock[IRecordProcessorCheckpointer]
     checkpointClockMock = mock[ManualClock]

From 30f3f556f7161a49baf145c0cbba8c088b512a6a Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 21 May 2015 13:51:40 -0700
Subject: [PATCH 308/320] [SPARK-7763] [SPARK-7616] [SQL] Persists partition
 columns into metastore

Author: Yin Huai <yhuai@databricks.com>
Author: Cheng Lian <lian@databricks.com>

Closes #6285 from liancheng/spark-7763 and squashes the following commits:

bb2829d [Yin Huai] Fix hashCode.
d677f7d [Cheng Lian] Fixes Scala style issue
44b283f [Cheng Lian] Adds test case for SPARK-7616
6733276 [Yin Huai] Fix a bug that potentially causes https://issues.apache.org/jira/browse/SPARK-7616.
6cabf3c [Yin Huai] Update unit test.
7e02910 [Yin Huai] Use metastore partition columns and do not hijack maybePartitionSpec.
e9a03ec [Cheng Lian] Persists partition columns into metastore
---
 .../apache/spark/sql/parquet/newParquet.scala | 26 +++++++---
 .../apache/spark/sql/sources/commands.scala   |  2 +
 .../org/apache/spark/sql/sources/ddl.scala    | 19 +++++--
 .../apache/spark/sql/sources/interfaces.scala | 31 ++++++++++--
 .../apache/spark/sql/test/SQLTestUtils.scala  |  7 +++
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 49 +++++++++++++++----
 .../spark/sql/hive/execution/commands.scala   |  2 +
 .../spark/sql/hive/orc/OrcRelation.scala      | 35 ++++++++-----
 .../sql/hive/MetastoreDataSourcesSuite.scala  | 30 ++++++++++++
 .../apache/spark/sql/hive/parquetSuites.scala | 28 +++++------
 .../sql/sources/SimpleTextRelation.scala      |  2 +-
 .../sql/sources/hadoopFsRelationSuites.scala  | 36 ++++++++++++--
 12 files changed, 211 insertions(+), 56 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index c35b7eff82af5..32986aa3ecc20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -49,8 +49,7 @@ private[sql] class DefaultSource extends HadoopFsRelationProvider {
       schema: Option[StructType],
       partitionColumns: Option[StructType],
       parameters: Map[String, String]): HadoopFsRelation = {
-    val partitionSpec = partitionColumns.map(PartitionSpec(_, Seq.empty))
-    new ParquetRelation2(paths, schema, partitionSpec, parameters)(sqlContext)
+    new ParquetRelation2(paths, schema, None, partitionColumns, parameters)(sqlContext)
   }
 }
 
@@ -118,12 +117,28 @@ private[sql] class ParquetOutputWriter(path: String, context: TaskAttemptContext
 private[sql] class ParquetRelation2(
     override val paths: Array[String],
     private val maybeDataSchema: Option[StructType],
+    // This is for metastore conversion.
     private val maybePartitionSpec: Option[PartitionSpec],
+    override val userDefinedPartitionColumns: Option[StructType],
     parameters: Map[String, String])(
     val sqlContext: SQLContext)
   extends HadoopFsRelation(maybePartitionSpec)
   with Logging {
 
+  private[sql] def this(
+      paths: Array[String],
+      maybeDataSchema: Option[StructType],
+      maybePartitionSpec: Option[PartitionSpec],
+      parameters: Map[String, String])(
+      sqlContext: SQLContext) = {
+    this(
+      paths,
+      maybeDataSchema,
+      maybePartitionSpec,
+      maybePartitionSpec.map(_.partitionColumns),
+      parameters)(sqlContext)
+  }
+
   // Should we merge schemas from all Parquet part-files?
   private val shouldMergeSchemas =
     parameters.getOrElse(ParquetRelation2.MERGE_SCHEMA, "true").toBoolean
@@ -161,7 +176,7 @@ private[sql] class ParquetRelation2(
         Boolean.box(shouldMergeSchemas),
         paths.toSet,
         maybeDataSchema,
-        maybePartitionSpec)
+        partitionColumns)
     } else {
       Objects.hashCode(
         Boolean.box(shouldMergeSchemas),
@@ -169,7 +184,7 @@ private[sql] class ParquetRelation2(
         dataSchema,
         schema,
         maybeDataSchema,
-        maybePartitionSpec)
+        partitionColumns)
     }
   }
 
@@ -185,9 +200,6 @@ private[sql] class ParquetRelation2(
 
   override def sizeInBytes: Long = metadataCache.dataStatuses.map(_.getLen).sum
 
-  override def userDefinedPartitionColumns: Option[StructType] =
-    maybePartitionSpec.map(_.partitionColumns)
-
   override def prepareJobForWrite(job: Job): OutputWriterFactory = {
     val conf = ContextUtil.getConfiguration(job)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index d54dbb0831444..498f7538d4f55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -93,6 +93,8 @@ private[sql] case class InsertIntoHadoopFsRelation(
       job.setOutputValueClass(classOf[Row])
       FileOutputFormat.setOutputPath(job, qualifiedOutputPath)
 
+      // We create a DataFrame by applying the schema of relation to the data to make sure.
+      // We are writing data based on the expected schema,
       val df = sqlContext.createDataFrame(
         DataFrame(sqlContext, query).queryExecution.toRdd,
         relation.schema,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index a13ab74852ff3..5e723122eeab1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.Logging
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Row}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.RunnableCommand
@@ -245,12 +245,13 @@ private[sql] object ResolvedDataSource {
             SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray
           }
 
-          val dataSchema = StructType(schema.filterNot(f => partitionColumns.contains(f.name)))
+          val dataSchema =
+            StructType(schema.filterNot(f => partitionColumns.contains(f.name))).asNullable
 
           dataSource.createRelation(
             sqlContext,
             paths,
-            Some(schema),
+            Some(dataSchema),
             maybePartitionsSchema,
             caseInsensitiveOptions)
         case dataSource: org.apache.spark.sql.sources.RelationProvider =>
@@ -320,10 +321,20 @@ private[sql] object ResolvedDataSource {
           Some(dataSchema.asNullable),
           Some(partitionColumnsSchema(data.schema, partitionColumns)),
           caseInsensitiveOptions)
+
+        // For partitioned relation r, r.schema's column ordering is different with the column
+        // ordering of data.logicalPlan. We need a Project to adjust the ordering.
+        // So, inside InsertIntoHadoopFsRelation, we can safely apply the schema of r.schema to
+        // the data.
+        val project =
+          Project(
+            r.schema.map(field => new UnresolvedAttribute(Seq(field.name))),
+            data.logicalPlan)
+
         sqlContext.executePlan(
           InsertIntoHadoopFsRelation(
             r,
-            data.logicalPlan,
+            project,
             partitionColumns.toArray,
             mode)).toRdd
         r
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index fcbac0d457950..61fc4e5c19998 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -28,7 +28,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SerializableWritable
-import org.apache.spark.sql._
+import org.apache.spark.sql.{Row, _}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
 import org.apache.spark.sql.types.{StructField, StructType}
@@ -120,11 +120,13 @@ trait HadoopFsRelationProvider {
    * Returns a new base relation with the given parameters, a user defined schema, and a list of
    * partition columns. Note: the parameters' keywords are case insensitive and this insensitivity
    * is enforced by the Map that is passed to the function.
+   *
+   * @param dataSchema Schema of data columns (i.e., columns that are not partition columns).
    */
   def createRelation(
       sqlContext: SQLContext,
       paths: Array[String],
-      schema: Option[StructType],
+      dataSchema: Option[StructType],
       partitionColumns: Option[StructType],
       parameters: Map[String, String]): HadoopFsRelation
 }
@@ -416,8 +418,29 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
   final private[sql] def partitionSpec: PartitionSpec = {
     if (_partitionSpec == null) {
       _partitionSpec = maybePartitionSpec
-        .map(spec => spec.copy(partitionColumns = spec.partitionColumns.asNullable))
-        .orElse(userDefinedPartitionColumns.map(PartitionSpec(_, Array.empty[Partition])))
+        .flatMap {
+          case spec if spec.partitions.nonEmpty =>
+            Some(spec.copy(partitionColumns = spec.partitionColumns.asNullable))
+          case _ =>
+            None
+        }
+        .orElse {
+          // We only know the partition columns and their data types. We need to discover
+          // partition values.
+          userDefinedPartitionColumns.map { partitionSchema =>
+            val spec = discoverPartitions()
+            val castedPartitions = spec.partitions.map { case p @ Partition(values, path) =>
+              val literals = values.toSeq.zip(spec.partitionColumns.map(_.dataType)).map {
+                case (value, dataType) => Literal.create(value, dataType)
+              }
+              val castedValues = partitionSchema.zip(literals).map { case (field, literal) =>
+                Cast(literal, field.dataType).eval()
+              }
+              p.copy(values = Row.fromSeq(castedValues))
+            }
+            PartitionSpec(partitionSchema, castedPartitions)
+          }
+        }
         .getOrElse {
           if (sqlContext.conf.partitionDiscoveryEnabled()) {
             discoverPartitions()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 75d290625ec38..ca66cdc48272d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -78,4 +78,11 @@ trait SQLTestUtils {
   protected def withTempTable(tableName: String)(f: => Unit): Unit = {
     try f finally sqlContext.dropTempTable(tableName)
   }
+
+  /**
+   * Drops table `tableName` after calling `f`.
+   */
+  protected def withTable(tableName: String)(f: => Unit): Unit = {
+    try f finally sqlContext.sql(s"DROP TABLE IF EXISTS $tableName")
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 2aa80b47a97e2..5b6840008f1ce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -66,11 +66,11 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         def schemaStringFromParts: Option[String] = {
           table.properties.get("spark.sql.sources.schema.numParts").map { numParts =>
             val parts = (0 until numParts.toInt).map { index =>
-              val part = table.properties.get(s"spark.sql.sources.schema.part.${index}").orNull
+              val part = table.properties.get(s"spark.sql.sources.schema.part.$index").orNull
               if (part == null) {
                 throw new AnalysisException(
-                  s"Could not read schema from the metastore because it is corrupted " +
-                  s"(missing part ${index} of the schema).")
+                  "Could not read schema from the metastore because it is corrupted " +
+                    s"(missing part $index of the schema, $numParts parts are expected).")
               }
 
               part
@@ -89,6 +89,11 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         val userSpecifiedSchema =
           schemaString.map(s => DataType.fromJson(s).asInstanceOf[StructType])
 
+        // We only need names at here since userSpecifiedSchema we loaded from the metastore
+        // contains partition columns. We can always get datatypes of partitioning columns
+        // from userSpecifiedSchema.
+        val partitionColumns = table.partitionColumns.map(_.name)
+
         // It does not appear that the ql client for the metastore has a way to enumerate all the
         // SerDe properties directly...
         val options = table.serdeProperties
@@ -97,7 +102,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           ResolvedDataSource(
             hive,
             userSpecifiedSchema,
-            Array.empty[String],
+            partitionColumns.toArray,
             table.properties("spark.sql.sources.provider"),
             options)
 
@@ -111,8 +116,8 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
   override def refreshTable(databaseName: String, tableName: String): Unit = {
     // refreshTable does not eagerly reload the cache. It just invalidate the cache.
     // Next time when we use the table, it will be populated in the cache.
-    // Since we also cache ParquetRealtions converted from Hive Parquet tables and
-    // adding converted ParquetRealtions into the cache is not defined in the load function
+    // Since we also cache ParquetRelations converted from Hive Parquet tables and
+    // adding converted ParquetRelations into the cache is not defined in the load function
     // of the cache (instead, we add the cache entry in convertToParquetRelation),
     // it is better at here to invalidate the cache to avoid confusing waring logs from the
     // cache loader (e.g. cannot find data source provider, which is only defined for
@@ -133,12 +138,17 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
   def createDataSourceTable(
       tableName: String,
       userSpecifiedSchema: Option[StructType],
+      partitionColumns: Array[String],
       provider: String,
       options: Map[String, String],
       isExternal: Boolean): Unit = {
     val (dbName, tblName) = processDatabaseAndTableName("default", tableName)
     val tableProperties = new scala.collection.mutable.HashMap[String, String]
     tableProperties.put("spark.sql.sources.provider", provider)
+
+    // Saves optional user specified schema.  Serialized JSON schema string may be too long to be
+    // stored into a single metastore SerDe property.  In this case, we split the JSON string and
+    // store each part as a separate SerDe property.
     if (userSpecifiedSchema.isDefined) {
       val threshold = conf.schemaStringLengthThreshold
       val schemaJsonString = userSpecifiedSchema.get.json
@@ -146,8 +156,29 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
       val parts = schemaJsonString.grouped(threshold).toSeq
       tableProperties.put("spark.sql.sources.schema.numParts", parts.size.toString)
       parts.zipWithIndex.foreach { case (part, index) =>
-        tableProperties.put(s"spark.sql.sources.schema.part.${index}", part)
+        tableProperties.put(s"spark.sql.sources.schema.part.$index", part)
+      }
+    }
+
+    val metastorePartitionColumns = userSpecifiedSchema.map { schema =>
+      val fields = partitionColumns.map(col => schema(col))
+      fields.map { field =>
+        HiveColumn(
+          name = field.name,
+          hiveType = HiveMetastoreTypes.toMetastoreType(field.dataType),
+          comment = "")
+      }.toSeq
+    }.getOrElse {
+      if (partitionColumns.length > 0) {
+        // The table does not have a specified schema, which means that the schema will be inferred
+        // when we load the table. So, we are not expecting partition columns and we will discover
+        // partitions when we load the table. However, if there are specified partition columns,
+        // we simplily ignore them and provide a warning message..
+        logWarning(
+          s"The schema and partitions of table $tableName will be inferred when it is loaded. " +
+            s"Specified partition columns (${partitionColumns.mkString(",")}) will be ignored.")
       }
+      Seq.empty[HiveColumn]
     }
 
     val tableType = if (isExternal) {
@@ -163,7 +194,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         specifiedDatabase = Option(dbName),
         name = tblName,
         schema = Seq.empty,
-        partitionColumns = Seq.empty,
+        partitionColumns = metastorePartitionColumns,
         tableType = tableType,
         properties = tableProperties.toMap,
         serdeProperties = options))
@@ -199,7 +230,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
       val dataSourceTable =
         cachedDataSourceTables(QualifiedTableName(databaseName, tblName).toLowerCase)
       // Then, if alias is specified, wrap the table with a Subquery using the alias.
-      // Othersie, wrap the table with a Subquery using the table name.
+      // Otherwise, wrap the table with a Subquery using the table name.
       val withAlias =
         alias.map(a => Subquery(a, dataSourceTable)).getOrElse(
           Subquery(tableIdent.last, dataSourceTable))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 6609763343752..0ba94d7b7c649 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -146,6 +146,7 @@ case class CreateMetastoreDataSource(
     hiveContext.catalog.createDataSourceTable(
       tableName,
       userSpecifiedSchema,
+      Array.empty[String],
       provider,
       optionsWithPath,
       isExternal)
@@ -244,6 +245,7 @@ case class CreateMetastoreDataSourceAsSelect(
       hiveContext.catalog.createDataSourceTable(
         tableName,
         Some(resolved.relation.schema),
+        partitionColumns,
         provider,
         optionsWithPath,
         isExternal)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
index b69e14a179d0a..f03c4cd54e7e6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -48,15 +48,14 @@ private[sql] class DefaultSource extends HadoopFsRelationProvider {
   def createRelation(
       sqlContext: SQLContext,
       paths: Array[String],
-      schema: Option[StructType],
+      dataSchema: Option[StructType],
       partitionColumns: Option[StructType],
       parameters: Map[String, String]): HadoopFsRelation = {
     assert(
       sqlContext.isInstanceOf[HiveContext],
       "The ORC data source can only be used with HiveContext.")
 
-    val partitionSpec = partitionColumns.map(PartitionSpec(_, Seq.empty[Partition]))
-    OrcRelation(paths, parameters, schema, partitionSpec)(sqlContext)
+    new OrcRelation(paths, dataSchema, None, partitionColumns, parameters)(sqlContext)
   }
 }
 
@@ -136,23 +135,35 @@ private[orc] class OrcOutputWriter(
 }
 
 @DeveloperApi
-private[sql] case class OrcRelation(
+private[sql] class OrcRelation(
     override val paths: Array[String],
-    parameters: Map[String, String],
-    maybeSchema: Option[StructType] = None,
-    maybePartitionSpec: Option[PartitionSpec] = None)(
+    maybeDataSchema: Option[StructType],
+    maybePartitionSpec: Option[PartitionSpec],
+    override val userDefinedPartitionColumns: Option[StructType],
+    parameters: Map[String, String])(
     @transient val sqlContext: SQLContext)
   extends HadoopFsRelation(maybePartitionSpec)
   with Logging {
 
-  override val dataSchema: StructType = maybeSchema.getOrElse {
+  private[sql] def this(
+      paths: Array[String],
+      maybeDataSchema: Option[StructType],
+      maybePartitionSpec: Option[PartitionSpec],
+      parameters: Map[String, String])(
+      sqlContext: SQLContext) = {
+    this(
+      paths,
+      maybeDataSchema,
+      maybePartitionSpec,
+      maybePartitionSpec.map(_.partitionColumns),
+      parameters)(sqlContext)
+  }
+
+  override val dataSchema: StructType = maybeDataSchema.getOrElse {
     OrcFileOperator.readSchema(
       paths.head, Some(sqlContext.sparkContext.hadoopConfiguration))
   }
 
-  override def userDefinedPartitionColumns: Option[StructType] =
-    maybePartitionSpec.map(_.partitionColumns)
-
   override def needConversion: Boolean = false
 
   override def equals(other: Any): Boolean = other match {
@@ -169,7 +180,7 @@ private[sql] case class OrcRelation(
       paths.toSet,
       dataSchema,
       schema,
-      maybePartitionSpec)
+      partitionColumns)
   }
 
   override def buildScan(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 30db976a3ae74..c4c7b634964ed 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -670,6 +670,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     catalog.createDataSourceTable(
       tableName = "wide_schema",
       userSpecifiedSchema = Some(schema),
+      partitionColumns = Array.empty[String],
       provider = "json",
       options = Map("path" -> "just a dummy path"),
       isExternal = false)
@@ -705,6 +706,35 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
     sql(s"drop table $tableName")
   }
 
+  test("Saving partition columns information") {
+    val df =
+      sparkContext.parallelize(1 to 10, 4).map { i =>
+        Tuple4(i, i + 1, s"str$i", s"str${i + 1}")
+      }.toDF("a", "b", "c", "d")
+
+    val tableName = s"partitionInfo_${System.currentTimeMillis()}"
+    df.write.format("parquet").partitionBy("d", "b").saveAsTable(tableName)
+    invalidateTable(tableName)
+    val metastoreTable = catalog.client.getTable("default", tableName)
+    val expectedPartitionColumns =
+      StructType(df.schema("d") :: df.schema("b") :: Nil)
+    val actualPartitionColumns =
+      StructType(
+        metastoreTable.partitionColumns.map(c =>
+          StructField(c.name, HiveMetastoreTypes.toDataType(c.hiveType))))
+    // Make sure partition columns are correctly stored in metastore.
+    assert(
+      expectedPartitionColumns.sameType(actualPartitionColumns),
+      s"Partitions columns stored in metastore $actualPartitionColumns is not the " +
+        s"partition columns defined by the saveAsTable operation $expectedPartitionColumns.")
+
+    // Check the content of the saved table.
+    checkAnswer(
+      table(tableName).selectExpr("c", "b", "d", "a"),
+      df.selectExpr("c", "b", "d", "a").collect())
+
+    sql(s"drop table $tableName")
+  }
 
   test("insert into a table") {
     def createDF(from: Int, to: Int): DataFrame =
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 1da990bc959ba..223ba65f47b90 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -435,9 +435,9 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
   }
 
   test("Caching converted data source Parquet Relations") {
-    def checkCached(tableIdentifer: catalog.QualifiedTableName): Unit = {
+    def checkCached(tableIdentifier: catalog.QualifiedTableName): Unit = {
       // Converted test_parquet should be cached.
-      catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) match {
+      catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) match {
         case null => fail("Converted test_parquet should be cached in the cache.")
         case logical @ LogicalRelation(parquetRelation: ParquetRelation2) => // OK
         case other =>
@@ -463,30 +463,30 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
         |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
       """.stripMargin)
 
-    var tableIdentifer = catalog.QualifiedTableName("default", "test_insert_parquet")
+    var tableIdentifier = catalog.QualifiedTableName("default", "test_insert_parquet")
 
     // First, make sure the converted test_parquet is not cached.
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
     // Table lookup will make the table cached.
     table("test_insert_parquet")
-    checkCached(tableIdentifer)
+    checkCached(tableIdentifier)
     // For insert into non-partitioned table, we will do the conversion,
     // so the converted test_insert_parquet should be cached.
     invalidateTable("test_insert_parquet")
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
     sql(
       """
         |INSERT INTO TABLE test_insert_parquet
         |select a, b from jt
       """.stripMargin)
-    checkCached(tableIdentifer)
+    checkCached(tableIdentifier)
     // Make sure we can read the data.
     checkAnswer(
       sql("select * from test_insert_parquet"),
       sql("select a, b from jt").collect())
     // Invalidate the cache.
     invalidateTable("test_insert_parquet")
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
 
     // Create a partitioned table.
     sql(
@@ -503,8 +503,8 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
         |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
       """.stripMargin)
 
-    tableIdentifer = catalog.QualifiedTableName("default", "test_parquet_partitioned_cache_test")
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    tableIdentifier = catalog.QualifiedTableName("default", "test_parquet_partitioned_cache_test")
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
     sql(
       """
         |INSERT INTO TABLE test_parquet_partitioned_cache_test
@@ -513,18 +513,18 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
       """.stripMargin)
     // Right now, insert into a partitioned Parquet is not supported in data source Parquet.
     // So, we expect it is not cached.
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
     sql(
       """
         |INSERT INTO TABLE test_parquet_partitioned_cache_test
         |PARTITION (date='2015-04-02')
         |select a, b from jt
       """.stripMargin)
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
 
     // Make sure we can cache the partitioned table.
     table("test_parquet_partitioned_cache_test")
-    checkCached(tableIdentifer)
+    checkCached(tableIdentifier)
     // Make sure we can read the data.
     checkAnswer(
       sql("select STRINGField, date, intField from test_parquet_partitioned_cache_test"),
@@ -536,7 +536,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
         """.stripMargin).collect())
 
     invalidateTable("test_parquet_partitioned_cache_test")
-    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
+    assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null)
 
     sql("DROP TABLE test_insert_parquet")
     sql("DROP TABLE test_parquet_partitioned_cache_test")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
index 09eed6646c55a..2d69b89fd9a9c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -99,7 +99,7 @@ class SimpleTextRelation(
   }
 
   override def hashCode(): Int =
-    Objects.hashCode(paths, maybeDataSchema, dataSchema)
+    Objects.hashCode(paths, maybeDataSchema, dataSchema, partitionColumns)
 
   override def buildScan(inputStatuses: Array[FileStatus]): RDD[Row] = {
     val fields = dataSchema.map(_.dataType)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
index ad4a4826c6b45..c7c8bcd27fbde 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -22,7 +22,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.parquet.ParquetTest
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 
@@ -237,10 +236,6 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils {
     }
   }
 
-  def withTable(tableName: String)(f: => Unit): Unit = {
-    try f finally sql(s"DROP TABLE $tableName")
-  }
-
   test("saveAsTable()/load() - non-partitioned table - Overwrite") {
     testDF.write.format(dataSourceName).mode(SaveMode.Overwrite)
       .option("dataSchema", dataSchema.json)
@@ -444,6 +439,23 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils {
       checkAnswer(df, partitionedTestDF.collect())
     }
   }
+
+  test("Partition column type casting") {
+    withTempPath { file =>
+      val input = partitionedTestDF.select('a, 'b, 'p1.cast(StringType).as('ps), 'p2)
+
+      input
+        .write
+        .format(dataSourceName)
+        .mode(SaveMode.Overwrite)
+        .partitionBy("ps", "p2")
+        .saveAsTable("t")
+
+      withTempTable("t") {
+        checkAnswer(table("t"), input.collect())
+      }
+    }
+  }
 }
 
 class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest {
@@ -504,4 +516,18 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
           .load(file.getCanonicalPath))
     }
   }
+
+  test("SPARK-7616: adjust column name order accordingly when saving partitioned table") {
+    val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
+
+    df.write
+      .format("parquet")
+      .mode(SaveMode.Overwrite)
+      .partitionBy("c", "a")
+      .saveAsTable("t")
+
+    withTable("t") {
+      checkAnswer(table("t"), df.select('b, 'c, 'a).collect())
+    }
+  }
 }

From 3d0cccc85850ca9c79f3e5ff7395bd04d212b063 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 21 May 2015 14:08:20 -0700
Subject: [PATCH 309/320] [SPARK-7478] [SQL] Added SQLContext.getOrCreate

Having a SQLContext singleton would make it easier for applications to use a lazily instantiated single shared instance of SQLContext when needed. It would avoid problems like

1. In REPL/notebook environment, rerunning the line {{val sqlContext = new SQLContext}} multiple times created different contexts while overriding the reference to previous context, leading to issues like registered temp tables going missing.

2. In Streaming, creating SQLContext directly leads to serialization/deserialization issues when attempting to recover from DStream checkpoints. See [SPARK-6770]. Also to get around this problem I had to suggest creating a singleton instance - https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala

This can be solved by {{SQLContext.getOrCreate}} which get or creates a new singleton instance of SQLContext using either a given SparkContext or a given SparkConf.

rxin marmbrus

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6006 from tdas/SPARK-7478 and squashes the following commits:

25f4da9 [Tathagata Das] Addressed comments.
79fe069 [Tathagata Das] Added comments.
c66ca76 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7478
48adb14 [Tathagata Das] Removed HiveContext.getOrCreate
bf8cf50 [Tathagata Das] Fix more bug
dec5594 [Tathagata Das] Fixed bug
b4e9721 [Tathagata Das] Remove unnecessary import
4ef513b [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into SPARK-7478
d3ea8e4 [Tathagata Das] Added HiveContext
83bc950 [Tathagata Das] Updated tests
f82ae81 [Tathagata Das] Fixed test
bc72868 [Tathagata Das] Added SQLContext.getOrCreate
---
 .../org/apache/spark/sql/SQLContext.scala     | 47 +++++++++++++++++-
 .../apache/spark/sql/SQLContextSuite.scala    | 49 +++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 304e958192bb9..1ea596dddff02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.beans.Introspector
 import java.util.Properties
+import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConversions._
 import scala.collection.immutable
@@ -1270,9 +1271,53 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   ////////////////////////////////////////////////////////////////////////////
   ////////////////////////////////////////////////////////////////////////////
-  // End of eeprecated methods
+  // End of deprecated methods
   ////////////////////////////////////////////////////////////////////////////
   ////////////////////////////////////////////////////////////////////////////
+
+
+  // Register a succesfully instantiatd context to the singleton. This should be at the end of
+  // the class definition so that the singleton is updated only if there is no exception in the
+  // construction of the instance.
+  SQLContext.setLastInstantiatedContext(self)
 }
 
+/**
+ * This SQLContext object contains utility functions to create a singleton SQLContext instance,
+ * or to get the last created SQLContext instance.
+ */
+object SQLContext {
+
+  private val INSTANTIATION_LOCK = new Object()
+
+  /**
+   * Reference to the last created SQLContext.
+   */
+  @transient private val lastInstantiatedContext = new AtomicReference[SQLContext]()
+
+  /**
+   * Get the singleton SQLContext if it exists or create a new one using the given SparkContext.
+   * This function can be used to create a singleton SQLContext object that can be shared across
+   * the JVM.
+   */
+  def getOrCreate(sparkContext: SparkContext): SQLContext = {
+    INSTANTIATION_LOCK.synchronized {
+      if (lastInstantiatedContext.get() == null) {
+        new SQLContext(sparkContext)
+      }
+    }
+    lastInstantiatedContext.get()
+  }
+
+  private[sql] def clearLastInstantiatedContext(): Unit = {
+    INSTANTIATION_LOCK.synchronized {
+      lastInstantiatedContext.set(null)
+    }
+  }
 
+  private[sql] def setLastInstantiatedContext(sqlContext: SQLContext): Unit = {
+    INSTANTIATION_LOCK.synchronized {
+      lastInstantiatedContext.set(sqlContext)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
new file mode 100644
index 0000000000000..f186bc1c18123
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -0,0 +1,49 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.sql.test.TestSQLContext
+
+class SQLContextSuite extends FunSuite with BeforeAndAfterAll {
+
+  private val testSqlContext = TestSQLContext
+  private val testSparkContext = TestSQLContext.sparkContext
+
+  override def afterAll(): Unit = {
+    SQLContext.setLastInstantiatedContext(testSqlContext)
+  }
+
+  test("getOrCreate instantiates SQLContext") {
+    SQLContext.clearLastInstantiatedContext()
+    val sqlContext = SQLContext.getOrCreate(testSparkContext)
+    assert(sqlContext != null, "SQLContext.getOrCreate returned null")
+    assert(SQLContext.getOrCreate(testSparkContext).eq(sqlContext),
+      "SQLContext created by SQLContext.getOrCreate not returned by SQLContext.getOrCreate")
+  }
+
+  test("getOrCreate gets last explicitly instantiated SQLContext") {
+    SQLContext.clearLastInstantiatedContext()
+    val sqlContext = new SQLContext(testSparkContext)
+    assert(SQLContext.getOrCreate(testSparkContext) != null,
+      "SQLContext.getOrCreate after explicitly created SQLContext returned null")
+    assert(SQLContext.getOrCreate(testSparkContext).eq(sqlContext),
+      "SQLContext.getOrCreate after explicitly created SQLContext did not return the context")
+  }
+}

From 6b18cdc1b1284b1d48d637d06a1e64829aeb6202 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@pigscanfly.ca>
Date: Thu, 21 May 2015 14:08:57 -0700
Subject: [PATCH 310/320] [SPARK-7711] Add a startTime property to match the
 corresponding one in Scala

Author: Holden Karau <holden@pigscanfly.ca>

Closes #6275 from holdenk/SPARK-771-startTime-is-missing-from-pyspark and squashes the following commits:

06662dc [Holden Karau] add mising blank line for style checks
7a87410 [Holden Karau] add back missing newline
7a7876b [Holden Karau] Add a startTime property to match the corresponding one in the Scala SparkContext
---
 python/pyspark/context.py | 5 +++++
 python/pyspark/tests.py   | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1f2b40b29fafa..aeb7ad4f2f83e 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -291,6 +291,11 @@ def version(self):
         """
         return self._jsc.version()
 
+    @property
+    def startTime(self):
+        """Return the epoch time when the Spark Context was started."""
+        return self._jsc.startTime()
+
     @property
     def defaultParallelism(self):
         """
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index d8e319994cc96..f9fb37f7fc139 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -1809,6 +1809,10 @@ def run():
 
             sc.stop()
 
+    def test_startTime(self):
+        with SparkContext() as sc:
+            self.assertGreater(sc.startTime, 0)
+
 
 @unittest.skipIf(not _have_scipy, "SciPy not installed")
 class SciPyTests(PySparkTestCase):

From 5287eec5a6948c0c6e0baaebf35f512324c0679a Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Thu, 21 May 2015 14:33:11 -0700
Subject: [PATCH 311/320] [SPARK-7718] [SQL] Speed up partitioning by avoiding
 closure cleaning

According to yhuai we spent 6-7 seconds cleaning closures in a partitioning job that takes 12 seconds. Since we provide these closures in Spark we know for sure they are serializable, so we can bypass the cleaning.

Author: Andrew Or <andrew@databricks.com>

Closes #6256 from andrewor14/sql-partition-speed-up and squashes the following commits:

a82b451 [Andrew Or] Fix style
10f7e3e [Andrew Or] Avoid getting call sites and cleaning closures
17e2943 [Andrew Or] Merge branch 'master' of github.com:apache/spark into sql-partition-speed-up
523f042 [Andrew Or] Skip unnecessary Utils.getCallSites too
f7fe143 [Andrew Or] Avoid unnecessary closure cleaning
---
 .../scala/org/apache/spark/util/Utils.scala   | 18 ++++
 .../apache/spark/sql/parquet/newParquet.scala | 98 ++++++++++---------
 .../sql/sources/DataSourceStrategy.scala      | 18 +++-
 .../spark/sql/sources/SqlNewHadoopRDD.scala   |  4 -
 4 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 6a7d1fae3320e..b7a2473dfe920 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2201,6 +2201,24 @@ private[spark] object Utils extends Logging {
     shutdownHooks.remove(ref)
   }
 
+  /**
+   * To avoid calling `Utils.getCallSite` for every single RDD we create in the body,
+   * set a dummy call site that RDDs use instead. This is for performance optimization.
+   */
+  def withDummyCallSite[T](sc: SparkContext)(body: => T): T = {
+    val oldShortCallSite = sc.getLocalProperty(CallSite.SHORT_FORM)
+    val oldLongCallSite = sc.getLocalProperty(CallSite.LONG_FORM)
+    try {
+      sc.setLocalProperty(CallSite.SHORT_FORM, "")
+      sc.setLocalProperty(CallSite.LONG_FORM, "")
+      body
+    } finally {
+      // Restore the old ones here
+      sc.setLocalProperty(CallSite.SHORT_FORM, oldShortCallSite)
+      sc.setLocalProperty(CallSite.LONG_FORM, oldLongCallSite)
+    }
+  }
+
 }
 
 private [util] class SparkShutdownHookManager {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 32986aa3ecc20..cb1e60883df1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -33,6 +33,7 @@ import parquet.hadoop._
 import parquet.hadoop.metadata.CompressionCodecName
 import parquet.hadoop.util.ContextUtil
 
+import org.apache.spark.{Partition => SparkPartition, SerializableWritable, Logging, SparkException}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.RDD._
@@ -40,7 +41,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.{Row, SQLConf, SQLContext}
-import org.apache.spark.{Partition => SparkPartition, SparkEnv, SerializableWritable, Logging, SparkException}
+import org.apache.spark.util.Utils
 
 private[sql] class DefaultSource extends HadoopFsRelationProvider {
   override def createRelation(
@@ -264,57 +265,58 @@ private[sql] class ParquetRelation2(
 
     val footers = inputFiles.map(f => metadataCache.footers(f.getPath))
 
-    // TODO Stop using `FilteringParquetRowInputFormat` and overriding `getPartition`.
-    // After upgrading to Parquet 1.6.0, we should be able to stop caching `FileStatus` objects and
-    // footers.  Especially when a global arbitrative schema (either from metastore or data source
-    // DDL) is available.
-    new SqlNewHadoopRDD(
-      sc = sqlContext.sparkContext,
-      broadcastedConf = broadcastedConf,
-      initDriverSideJobFuncOpt = Some(setInputPaths),
-      initLocalJobFuncOpt = Some(initLocalJobFuncOpt),
-      inputFormatClass = classOf[FilteringParquetRowInputFormat],
-      keyClass = classOf[Void],
-      valueClass = classOf[Row]) {
-
-      val cacheMetadata = useMetadataCache
-
-      @transient val cachedStatuses = inputFiles.map { f =>
-        // In order to encode the authority of a Path containing special characters such as /,
-        // we need to use the string returned by the URI of the path to create a new Path.
-        val pathWithAuthority = new Path(f.getPath.toUri.toString)
-
-        new FileStatus(
-          f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime,
-          f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithAuthority)
-      }.toSeq
-
-      @transient val cachedFooters = footers.map { f =>
-        // In order to encode the authority of a Path containing special characters such as /,
-        // we need to use the string returned by the URI of the path to create a new Path.
-        new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata)
-      }.toSeq
-
-      // Overridden so we can inject our own cached files statuses.
-      override def getPartitions: Array[SparkPartition] = {
-        val inputFormat = if (cacheMetadata) {
-          new FilteringParquetRowInputFormat {
-            override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatuses
-
-            override def getFooters(jobContext: JobContext): JList[Footer] = cachedFooters
+    Utils.withDummyCallSite(sqlContext.sparkContext) {
+      // TODO Stop using `FilteringParquetRowInputFormat` and overriding `getPartition`.
+      // After upgrading to Parquet 1.6.0, we should be able to stop caching `FileStatus` objects
+      // and footers. Especially when a global arbitrative schema (either from metastore or data
+      // source DDL) is available.
+      new SqlNewHadoopRDD(
+        sc = sqlContext.sparkContext,
+        broadcastedConf = broadcastedConf,
+        initDriverSideJobFuncOpt = Some(setInputPaths),
+        initLocalJobFuncOpt = Some(initLocalJobFuncOpt),
+        inputFormatClass = classOf[FilteringParquetRowInputFormat],
+        keyClass = classOf[Void],
+        valueClass = classOf[Row]) {
+
+        val cacheMetadata = useMetadataCache
+
+        @transient val cachedStatuses = inputFiles.map { f =>
+          // In order to encode the authority of a Path containing special characters such as /,
+          // we need to use the string returned by the URI of the path to create a new Path.
+          val pathWithAuthority = new Path(f.getPath.toUri.toString)
+
+          new FileStatus(
+            f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime,
+            f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithAuthority)
+        }.toSeq
+
+        @transient val cachedFooters = footers.map { f =>
+          // In order to encode the authority of a Path containing special characters such as /,
+          // we need to use the string returned by the URI of the path to create a new Path.
+          new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata)
+        }.toSeq
+
+        // Overridden so we can inject our own cached files statuses.
+        override def getPartitions: Array[SparkPartition] = {
+          val inputFormat = if (cacheMetadata) {
+            new FilteringParquetRowInputFormat {
+              override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatuses
+              override def getFooters(jobContext: JobContext): JList[Footer] = cachedFooters
+            }
+          } else {
+            new FilteringParquetRowInputFormat
           }
-        } else {
-          new FilteringParquetRowInputFormat
-        }
 
-        val jobContext = newJobContext(getConf(isDriverSide = true), jobId)
-        val rawSplits = inputFormat.getSplits(jobContext)
+          val jobContext = newJobContext(getConf(isDriverSide = true), jobId)
+          val rawSplits = inputFormat.getSplits(jobContext)
 
-        Array.tabulate[SparkPartition](rawSplits.size) { i =>
-          new SqlNewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+          Array.tabulate[SparkPartition](rawSplits.size) { i =>
+            new SqlNewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
+          }
         }
-      }
-    }.values
+      }.values
+    }
   }
 
   private class MetadataCache {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
index 550090d22d551..c03649d00bbae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.spark.{SerializableWritable, Logging}
+import org.apache.spark.{Logging, SerializableWritable, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.rdd.{RDD, UnionRDD}
+import org.apache.spark.rdd.{MapPartitionsRDD, RDD, UnionRDD}
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.types.{StringType, StructType, UTF8String}
 import org.apache.spark.sql.{SaveMode, Strategy, execution, sources}
+import org.apache.spark.util.Utils
 
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
@@ -197,7 +198,10 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
         }
       }
 
-      dataRows.mapPartitions { iterator =>
+      // Since we know for sure that this closure is serializable, we can avoid the overhead
+      // of cleaning a closure for each RDD by creating our own MapPartitionsRDD. Functionally
+      // this is equivalent to calling `dataRows.mapPartitions(mapPartitionsFunc)` (SPARK-7718).
+      val mapPartitionsFunc = (_: TaskContext, _: Int, iterator: Iterator[Row]) => {
         val dataTypes = requiredColumns.map(schema(_).dataType)
         val mutableRow = new SpecificMutableRow(dataTypes)
         iterator.map { dataRow =>
@@ -209,6 +213,14 @@ private[sql] object DataSourceStrategy extends Strategy with Logging {
           mutableRow.asInstanceOf[expressions.Row]
         }
       }
+
+      // This is an internal RDD whose call site the user should not be concerned with
+      // Since we create many of these (one per partition), the time spent on computing
+      // the call site may add up.
+      Utils.withDummyCallSite(dataRows.sparkContext) {
+        new MapPartitionsRDD(dataRows, mapPartitionsFunc, preservesPartitioning = false)
+      }
+
     } else {
       dataRows
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala
index 0c7bb6e50cd98..a74a98631da35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala
@@ -75,10 +75,6 @@ private[sql] class SqlNewHadoopRDD[K, V](
   with SparkHadoopMapReduceUtil
   with Logging {
 
-  if (initLocalJobFuncOpt.isDefined) {
-    sc.clean(initLocalJobFuncOpt.get)
-  }
-
   protected def getJob(): Job = {
     val conf: Configuration = broadcastedConf.value.value
     // "new Job" will make a copy of the conf. Then, it is

From 5a3c04bb92e21bd221a75c4ae13a71f7d4716b44 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 21 May 2015 23:12:00 +0100
Subject: [PATCH 312/320] [SPARK-7800] isDefined should not marked too early in
 putNewKey

JIRA: https://issues.apache.org/jira/browse/SPARK-7800

`isDefined` is marked as true twice in `Location.putNewKey`. The first one is unnecessary and will cause problem because it is too early and before some assert checking. E.g., if an attempt with incorrect `keyLengthBytes` marks `isDefined` as true, the location can not be used later.

ping JoshRosen

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #6324 from viirya/dup_isdefined and squashes the following commits:

cbfe03b [Liang-Chi Hsieh] isDefined should not marked too early in putNewKey.
---
 .../main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index bd4ca74cc7764..0b4d8d286f5f9 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -429,7 +429,6 @@ public void putNewKey(
         long valueBaseOffset,
         int valueLengthBytes) {
       assert (!isDefined) : "Can only set value once for a key";
-      isDefined = true;
       assert (keyLengthBytes % 8 == 0);
       assert (valueLengthBytes % 8 == 0);
       if (size == MAX_CAPACITY) {

From 147b6be3b6d464dfc14836c08e690ab021a600de Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 21 May 2015 15:40:58 -0700
Subject: [PATCH 313/320] [BUILD] Always run SQL tests in master build.

Seems our master build does not run HiveCompatibilitySuite (because _RUN_SQL_TESTS is not set). This PR introduces a property `AMP_JENKINS_PRB` to differentiate a PR build and a regular build. If a build is a regular one, we always set _RUN_SQL_TESTS to true.

cc JoshRosen nchammas

Author: Yin Huai <yhuai@databricks.com>

Closes #5955 from yhuai/runSQLTests and squashes the following commits:

3d399bc [Yin Huai] Always run SQL tests in master build.
---
 dev/run-tests         | 41 ++++++++++++++++++++++++-----------------
 dev/run-tests-jenkins |  2 ++
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/dev/run-tests b/dev/run-tests
index 44d802782c4a4..b444e74706b65 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -82,24 +82,31 @@ export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
 if [ -n "$AMPLAB_JENKINS" ]; then
   git fetch origin master:master
 
-  sql_diffs=$(
-    git diff --name-only master \
-    | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
-  )
-
-  non_sql_diffs=$(
-    git diff --name-only master \
-    | grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
-  )
-
-  if [ -n "$sql_diffs" ]; then
-    echo "[info] Detected changes in SQL. Will run Hive test suite."
-    _RUN_SQL_TESTS=true
-
-    if [ -z "$non_sql_diffs" ]; then
-      echo "[info] Detected no changes except in SQL. Will only run SQL tests."
-      _SQL_TESTS_ONLY=true
+  # AMP_JENKINS_PRB indicates if the current build is a pull request build.
+  if [ -n "$AMP_JENKINS_PRB" ]; then
+    # It is a pull request build.
+    sql_diffs=$(
+      git diff --name-only master \
+      | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
+    )
+
+    non_sql_diffs=$(
+      git diff --name-only master \
+      | grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
+    )
+
+    if [ -n "$sql_diffs" ]; then
+      echo "[info] Detected changes in SQL. Will run Hive test suite."
+      _RUN_SQL_TESTS=true
+
+      if [ -z "$non_sql_diffs" ]; then
+        echo "[info] Detected no changes except in SQL. Will only run SQL tests."
+        _SQL_TESTS_ONLY=true
+      fi
     fi
+  else
+    # It is a regular build. We should run SQL tests.
+    _RUN_SQL_TESTS=true
   fi
 fi
 
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index f452ab66efcd8..8b2a44fd72ba5 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -185,6 +185,8 @@ done
 
 # run tests
 {
+  # Marks this build is a pull request build.
+  export AMP_JENKINS_PRB=true
   timeout "${TESTS_TIMEOUT}" ./dev/run-tests
   test_result="$?"
 

From 347b50106bd1bcd40049f1ca29cefbb0baf53413 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 22 May 2015 07:10:26 +0800
Subject: [PATCH 314/320] [SPARK-7737] [SQL] Use leaf dirs having data files to
 discover partitions.

https://issues.apache.org/jira/browse/SPARK-7737

cc liancheng

Author: Yin Huai <yhuai@databricks.com>

Closes #6329 from yhuai/spark-7737 and squashes the following commits:

7e0dfc7 [Yin Huai] Use leaf dirs having data files to discover partitions.
---
 .../org/apache/spark/sql/sources/interfaces.scala      |  7 ++-----
 .../sql/parquet/ParquetPartitionDiscoverySuite.scala   | 10 +++++++++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 61fc4e5c19998..aaabbadcd651b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -377,8 +377,6 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
 
     var leafDirToChildrenFiles = mutable.Map.empty[Path, Array[FileStatus]]
 
-    var leafDirs = mutable.Map.empty[Path, FileStatus]
-
     def refresh(): Unit = {
       def listLeafFilesAndDirs(fs: FileSystem, status: FileStatus): Set[FileStatus] = {
         val (dirs, files) = fs.listStatus(status.getPath).partition(_.isDir)
@@ -386,7 +384,6 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
         files.toSet ++ leafDirs ++ dirs.flatMap(dir => listLeafFilesAndDirs(fs, dir))
       }
 
-      leafDirs.clear()
       leafFiles.clear()
 
       // We don't filter files/directories like _temporary/_SUCCESS here, as specific data sources
@@ -399,7 +396,6 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
       }
 
       val (dirs, files) = statuses.partition(_.isDir)
-      leafDirs ++= dirs.map(d => d.getPath -> d).toMap
       leafFiles ++= files.map(f => f.getPath -> f).toMap
       leafDirToChildrenFiles ++= files.groupBy(_.getPath.getParent)
     }
@@ -484,7 +480,8 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
   }
 
   private def discoverPartitions(): PartitionSpec = {
-    val leafDirs = fileStatusCache.leafDirs.keys.toSeq
+    // We use leaf dirs containing data files to discover the schema.
+    val leafDirs = fileStatusCache.leafDirToChildrenFiles.keys.toSeq
     PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
index 907dbb0119b40..90d4528efca48 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.parquet
 
+import java.io.File
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.fs.Path
@@ -175,11 +177,17 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
         pi <- Seq(1, 2)
         ps <- Seq("foo", "bar")
       } {
+        val dir = makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)
         makeParquetFile(
           (1 to 10).map(i => ParquetData(i, i.toString)),
-          makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
+          dir)
+        // Introduce _temporary dir to test the robustness of the schema discovery process.
+        new File(dir.toString, "_temporary").mkdir()
       }
+      // Introduce _temporary dir to the base dir the robustness of the schema discovery process.
+      new File(base.getCanonicalPath, "_temporary").mkdir()
 
+      println("load the partitioned table")
       read.parquet(base.getCanonicalPath).registerTempTable("t")
 
       withTempTable("t") {

From d68ea24d60ce1aa55b06a8c107f42544d696eb41 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 21 May 2015 17:41:31 -0700
Subject: [PATCH 315/320] [SPARK-7776] [STREAMING] Added shutdown hook to
 StreamingContext

Shutdown hook to stop SparkContext was added recently. This results in ugly errors when a streaming application is terminated by ctrl-C.

```
Exception in thread "Thread-27" org.apache.spark.SparkException: Job cancelled because SparkContext was shut down
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:736)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:735)
	at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
	at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:735)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:1468)
	at org.apache.spark.util.EventLoop.stop(EventLoop.scala:84)
	at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:1403)
	at org.apache.spark.SparkContext.stop(SparkContext.scala:1642)
	at org.apache.spark.SparkContext$$anonfun$3.apply$mcV$sp(SparkContext.scala:559)
	at org.apache.spark.util.SparkShutdownHook.run(Utils.scala:2266)
	at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(Utils.scala:2236)
	at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(Utils.scala:2236)
	at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(Utils.scala:2236)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1764)
	at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(Utils.scala:2236)
	at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(Utils.scala:2236)
	at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(Utils.scala:2236)
	at scala.util.Try$.apply(Try.scala:161)
	at org.apache.spark.util.SparkShutdownHookManager.runAll(Utils.scala:2236)
	at org.apache.spark.util.SparkShutdownHookManager$$anon$6.run(Utils.scala:2218)
	at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)
```

This is because the Spark's shutdown hook stops the context, and the streaming jobs fail in the middle. The correct solution is to stop the streaming context before the spark context. This PR adds the shutdown hook to do so with a priority higher than the SparkContext's shutdown hooks priority.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #6307 from tdas/SPARK-7776 and squashes the following commits:

e3d5475 [Tathagata Das] Added conf to specify graceful shutdown
4c18652 [Tathagata Das] Added shutdown hook to StreamingContxt.
---
 .../spark/streaming/StreamingContext.scala     | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 160fc42c57d18..7b77d447ce6df 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -42,7 +42,7 @@ import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver}
 import org.apache.spark.streaming.scheduler.{JobScheduler, StreamingListener}
 import org.apache.spark.streaming.ui.{StreamingJobProgressListener, StreamingTab}
-import org.apache.spark.util.CallSite
+import org.apache.spark.util.{CallSite, Utils}
 
 /**
  * Main entry point for Spark Streaming functionality. It provides methods used to create
@@ -201,6 +201,8 @@ class StreamingContext private[streaming] (
 
   private val startSite = new AtomicReference[CallSite](null)
 
+  private var shutdownHookRef: AnyRef = _
+
   /**
    * Return the associated Spark context
    */
@@ -584,6 +586,8 @@ class StreamingContext private[streaming] (
           state = StreamingContextState.ACTIVE
           StreamingContext.setActiveContext(this)
         }
+        shutdownHookRef = Utils.addShutdownHook(
+          StreamingContext.SHUTDOWN_HOOK_PRIORITY)(stopOnShutdown)
         logInfo("StreamingContext started")
       case ACTIVE =>
         logWarning("StreamingContext has already been started")
@@ -660,6 +664,9 @@ class StreamingContext private[streaming] (
           uiTab.foreach(_.detach())
           StreamingContext.setActiveContext(null)
           waiter.notifyStop()
+          if (shutdownHookRef != null) {
+            Utils.removeShutdownHook(shutdownHookRef)
+          }
           logInfo("StreamingContext stopped successfully")
       }
       // Even if we have already stopped, we still need to attempt to stop the SparkContext because
@@ -670,6 +677,13 @@ class StreamingContext private[streaming] (
       state = STOPPED
     }
   }
+
+  private def stopOnShutdown(): Unit = {
+    val stopGracefully = conf.getBoolean("spark.streaming.stopGracefullyOnShutdown", false)
+    logInfo(s"Invoking stop(stopGracefully=$stopGracefully) from shutdown hook")
+    // Do not stop SparkContext, let its own shutdown hook stop it
+    stop(stopSparkContext = false, stopGracefully = stopGracefully)
+  }
 }
 
 /**
@@ -685,6 +699,8 @@ object StreamingContext extends Logging {
    */
   private val ACTIVATION_LOCK = new Object()
 
+  private val SHUTDOWN_HOOK_PRIORITY = Utils.SPARK_CONTEXT_SHUTDOWN_PRIORITY + 1
+
   private val activeContext = new AtomicReference[StreamingContext](null)
 
   private def assertNoOtherContextIsActive(): Unit = {

From 17791a58159b3e4619d0367f54a4c5332342658b Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Thu, 21 May 2015 17:43:08 -0700
Subject: [PATCH 316/320] [SPARK-7783] [SQL] [PySpark] add
 DataFrame.rollup/cube in Python

Author: Davies Liu <davies@databricks.com>

Closes #6311 from davies/rollup and squashes the following commits:

0261db1 [Davies Liu] use @since
a51ca6b [Davies Liu] Merge branch 'master' of github.com:apache/spark into rollup
8ad5af4 [Davies Liu] Update dataframe.py
ade3841 [Davies Liu] add DataFrame.rollup/cube in Python
---
 python/pyspark/sql/dataframe.py | 48 +++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 3fc7d0048edf6..132db90e69f59 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -801,9 +801,53 @@ def groupBy(self, *cols):
         >>> df.groupBy(['name', df.age]).count().collect()
         [Row(name=u'Bob', age=5, count=1), Row(name=u'Alice', age=2, count=1)]
         """
-        jdf = self._jdf.groupBy(self._jcols(*cols))
+        jgd = self._jdf.groupBy(self._jcols(*cols))
         from pyspark.sql.group import GroupedData
-        return GroupedData(jdf, self.sql_ctx)
+        return GroupedData(jgd, self.sql_ctx)
+
+    @since(1.4)
+    def rollup(self, *cols):
+        """
+        Create a multi-dimensional rollup for the current :class:`DataFrame` using
+        the specified columns, so we can run aggregation on them.
+
+        >>> df.rollup('name', df.age).count().show()
+        +-----+----+-----+
+        | name| age|count|
+        +-----+----+-----+
+        |Alice|null|    1|
+        |  Bob|   5|    1|
+        |  Bob|null|    1|
+        | null|null|    2|
+        |Alice|   2|    1|
+        +-----+----+-----+
+        """
+        jgd = self._jdf.rollup(self._jcols(*cols))
+        from pyspark.sql.group import GroupedData
+        return GroupedData(jgd, self.sql_ctx)
+
+    @since(1.4)
+    def cube(self, *cols):
+        """
+        Create a multi-dimensional cube for the current :class:`DataFrame` using
+        the specified columns, so we can run aggregation on them.
+
+        >>> df.cube('name', df.age).count().show()
+        +-----+----+-----+
+        | name| age|count|
+        +-----+----+-----+
+        | null|   2|    1|
+        |Alice|null|    1|
+        |  Bob|   5|    1|
+        |  Bob|null|    1|
+        | null|   5|    1|
+        | null|null|    2|
+        |Alice|   2|    1|
+        +-----+----+-----+
+        """
+        jgd = self._jdf.cube(self._jcols(*cols))
+        from pyspark.sql.group import GroupedData
+        return GroupedData(jgd, self.sql_ctx)
 
     @since(1.3)
     def agg(self, *exprs):

From f5db4b416c922db7a8f1b0c098b4f08647106231 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 21 May 2015 17:59:03 -0700
Subject: [PATCH 317/320] [SPARK-7794] [MLLIB] update RegexTokenizer default
 settings

The previous default is `{gaps: false, pattern: "\\p{L}+|[^\\p{L}\\s]+"}`. The default pattern is hard to understand. This PR changes the default to `{gaps: true, pattern: "\\s+"}`. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6330 from mengxr/SPARK-7794 and squashes the following commits:

5ee7cde [Xiangrui Meng] update RegexTokenizer default settings
---
 .../apache/spark/ml/feature/Tokenizer.scala   | 18 +++++----
 .../spark/ml/feature/TokenizerSuite.scala     | 32 +++++++--------
 python/pyspark/ml/feature.py                  | 40 +++++++++----------
 3 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 3f7f4f96fc422..31f3a1aa4c76b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
 /**
  * :: AlphaComponent ::
  * A tokenizer that converts the input string to lowercase and then splits it by white spaces.
+ *
+ * @see [[RegexTokenizer]]
  */
 @AlphaComponent
 class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] {
@@ -45,9 +47,9 @@ class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[S
 
 /**
  * :: AlphaComponent ::
- * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default)
- * or using it to split the text (set matching to false). Optional parameters also allow filtering
- * tokens using a minimal length.
+ * A regex based tokenizer that extracts tokens either by using the provided regex pattern to split
+ * the text (default) or repeatedly matching the regex (if `gaps` is true).
+ * Optional parameters also allow filtering tokens using a minimal length.
  * It returns an array of strings that can be empty.
  */
 @AlphaComponent
@@ -71,8 +73,8 @@ class RegexTokenizer(override val uid: String)
   def getMinTokenLength: Int = $(minTokenLength)
 
   /**
-   * Indicates whether regex splits on gaps (true) or matching tokens (false).
-   * Default: false
+   * Indicates whether regex splits on gaps (true) or matches tokens (false).
+   * Default: true
    * @group param
    */
   val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens")
@@ -84,8 +86,8 @@ class RegexTokenizer(override val uid: String)
   def getGaps: Boolean = $(gaps)
 
   /**
-   * Regex pattern used by tokenizer.
-   * Default: `"\\p{L}+|[^\\p{L}\\s]+"`
+   * Regex pattern used to match delimiters if [[gaps]] is true or tokens if [[gaps]] is false.
+   * Default: `"\\s+"`
    * @group param
    */
   val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing")
@@ -96,7 +98,7 @@ class RegexTokenizer(override val uid: String)
   /** @group getParam */
   def getPattern: String = $(pattern)
 
-  setDefault(minTokenLength -> 1, gaps -> false, pattern -> "\\p{L}+|[^\\p{L}\\s]+")
+  setDefault(minTokenLength -> 1, gaps -> true, pattern -> "\\s+")
 
   override protected def createTransformFunc: String => Seq[String] = { str =>
     val re = $(pattern).r
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
index a46d08d65150f..eabda089d0988 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
@@ -29,35 +29,34 @@ case class TokenizerTestData(rawText: String, wantedTokens: Array[String])
 
 class RegexTokenizerSuite extends FunSuite with MLlibTestSparkContext {
   import org.apache.spark.ml.feature.RegexTokenizerSuite._
-  
+
   test("RegexTokenizer") {
-    val tokenizer = new RegexTokenizer()
+    val tokenizer0 = new RegexTokenizer()
+      .setGaps(false)
+      .setPattern("\\w+|\\p{Punct}")
       .setInputCol("rawText")
       .setOutputCol("tokens")
-
     val dataset0 = sqlContext.createDataFrame(Seq(
       TokenizerTestData("Test for tokenization.", Array("Test", "for", "tokenization", ".")),
       TokenizerTestData("Te,st. punct", Array("Te", ",", "st", ".", "punct"))
     ))
-    testRegexTokenizer(tokenizer, dataset0)
+    testRegexTokenizer(tokenizer0, dataset0)
 
     val dataset1 = sqlContext.createDataFrame(Seq(
       TokenizerTestData("Test for tokenization.", Array("Test", "for", "tokenization")),
       TokenizerTestData("Te,st. punct", Array("punct"))
     ))
+    tokenizer0.setMinTokenLength(3)
+    testRegexTokenizer(tokenizer0, dataset1)
 
-    tokenizer.setMinTokenLength(3)
-    testRegexTokenizer(tokenizer, dataset1)
-
-    tokenizer
-      .setPattern("\\s")
-      .setGaps(true)
-      .setMinTokenLength(0)
+    val tokenizer2 = new RegexTokenizer()
+      .setInputCol("rawText")
+      .setOutputCol("tokens")
     val dataset2 = sqlContext.createDataFrame(Seq(
       TokenizerTestData("Test for tokenization.", Array("Test", "for", "tokenization.")),
-      TokenizerTestData("Te,st.  punct", Array("Te,st.", "", "punct"))
+      TokenizerTestData("Te,st.  punct", Array("Te,st.", "punct"))
     ))
-    testRegexTokenizer(tokenizer, dataset2)
+    testRegexTokenizer(tokenizer2, dataset2)
   }
 }
 
@@ -67,9 +66,8 @@ object RegexTokenizerSuite extends FunSuite {
     t.transform(dataset)
       .select("tokens", "wantedTokens")
       .collect()
-      .foreach {
-        case Row(tokens, wantedTokens) =>
-          assert(tokens === wantedTokens)
-    }
+      .foreach { case Row(tokens, wantedTokens) =>
+        assert(tokens === wantedTokens)
+      }
   }
 }
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 5511dceb70419..b0479d9b074db 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -446,23 +446,25 @@ def getDegree(self):
 @ignore_unicode_prefix
 class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
     """
-    A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default)
-    or using it to split the text (set matching to false). Optional parameters also allow filtering
-    tokens using a minimal length.
+    A regex based tokenizer that extracts tokens either by using the
+    provided regex pattern (in Java dialect) to split the text
+    (default) or repeatedly matching the regex (if gaps is true).
+    Optional parameters also allow filtering tokens using a minimal
+    length.
     It returns an array of strings that can be empty.
 
-    >>> df = sqlContext.createDataFrame([("a b c",)], ["text"])
+    >>> df = sqlContext.createDataFrame([("a b  c",)], ["text"])
     >>> reTokenizer = RegexTokenizer(inputCol="text", outputCol="words")
     >>> reTokenizer.transform(df).head()
-    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    Row(text=u'a b  c', words=[u'a', u'b', u'c'])
     >>> # Change a parameter.
     >>> reTokenizer.setParams(outputCol="tokens").transform(df).head()
-    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    Row(text=u'a b  c', tokens=[u'a', u'b', u'c'])
     >>> # Temporarily modify a parameter.
     >>> reTokenizer.transform(df, {reTokenizer.outputCol: "words"}).head()
-    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    Row(text=u'a b  c', words=[u'a', u'b', u'c'])
     >>> reTokenizer.transform(df).head()
-    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    Row(text=u'a b  c', tokens=[u'a', u'b', u'c'])
     >>> # Must use keyword arguments to specify params.
     >>> reTokenizer.setParams("text")
     Traceback (most recent call last):
@@ -472,31 +474,27 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
 
     # a placeholder to make it appear in the generated doc
     minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)")
-    gaps = Param(Params._dummy(), "gaps", "Set regex to match gaps or tokens")
-    pattern = Param(Params._dummy(), "pattern", "regex pattern used for tokenizing")
+    gaps = Param(Params._dummy(), "gaps", "whether regex splits on gaps (True) or matches tokens")
+    pattern = Param(Params._dummy(), "pattern", "regex pattern (Java dialect) used for tokenizing")
 
     @keyword_only
-    def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
-                 inputCol=None, outputCol=None):
+    def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None):
         """
-        __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
-                 inputCol=None, outputCol=None)
+        __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None)
         """
         super(RegexTokenizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid)
         self.minTokenLength = Param(self, "minTokenLength", "minimum token length (>= 0)")
-        self.gaps = Param(self, "gaps", "Set regex to match gaps or tokens")
-        self.pattern = Param(self, "pattern", "regex pattern used for tokenizing")
-        self._setDefault(minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+")
+        self.gaps = Param(self, "gaps", "whether regex splits on gaps (True) or matches tokens")
+        self.pattern = Param(self, "pattern", "regex pattern (Java dialect) used for tokenizing")
+        self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
-    def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
-                  inputCol=None, outputCol=None):
+    def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None):
         """
-        setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
-                  inputCol="input", outputCol="output")
+        setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None)
         Sets params for this RegexTokenizer.
         """
         kwargs = self.setParams._input_kwargs

From 85b96372cf0fd055f89fc639f45c1f2cb02a378f Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Thu, 21 May 2015 18:04:45 -0700
Subject: [PATCH 318/320] [SPARK-7219] [MLLIB] Output feature attributes in
 HashingTF

This PR updates `HashingTF` to output ML attributes that tell the number of features in the output column. We need to expand `UnaryTransformer` to support output metadata. A `df outputMetadata: Metadata` is not sufficient because the metadata may also depends on the input data. Though this is not true for `HashingTF`, I think it is reasonable to update `UnaryTransformer` in a separate PR. `checkParams` is added to verify common requirements for params. I will send a separate PR to use it in other test suites. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #6308 from mengxr/SPARK-7219 and squashes the following commits:

9bd2922 [Xiangrui Meng] address comments
e82a68a [Xiangrui Meng] remove sqlContext from test suite
995535b [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7219
2194703 [Xiangrui Meng] add test for attributes
178ae23 [Xiangrui Meng] update HashingTF with tests
91a6106 [Xiangrui Meng] WIP
---
 .../apache/spark/ml/feature/HashingTF.scala   | 34 +++++++++---
 .../spark/ml/feature/HashingTFSuite.scala     | 55 +++++++++++++++++++
 .../apache/spark/ml/param/ParamsSuite.scala   | 20 +++++++
 3 files changed, 101 insertions(+), 8 deletions(-)
 create mode 100644 mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index 30033ced68a04..8942d45219177 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -18,22 +18,31 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.annotation.AlphaComponent
-import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
 import org.apache.spark.ml.param.{IntParam, ParamValidators}
-import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.mllib.feature
-import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.{udf, col}
+import org.apache.spark.sql.types.{ArrayType, StructType}
 
 /**
  * :: AlphaComponent ::
  * Maps a sequence of terms to their term frequencies using the hashing trick.
  */
 @AlphaComponent
-class HashingTF(override val uid: String) extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
+class HashingTF(override val uid: String) extends Transformer with HasInputCol with HasOutputCol {
 
   def this() = this(Identifiable.randomUID("hashingTF"))
 
+  /** @group setParam */
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
   /**
    * Number of features.  Should be > 0.
    * (default = 2^18^)
@@ -50,10 +59,19 @@ class HashingTF(override val uid: String) extends UnaryTransformer[Iterable[_],
   /** @group setParam */
   def setNumFeatures(value: Int): this.type = set(numFeatures, value)
 
-  override protected def createTransformFunc: Iterable[_] => Vector = {
+  override def transform(dataset: DataFrame): DataFrame = {
+    val outputSchema = transformSchema(dataset.schema)
     val hashingTF = new feature.HashingTF($(numFeatures))
-    hashingTF.transform
+    val t = udf { terms: Seq[_] => hashingTF.transform(terms) }
+    val metadata = outputSchema($(outputCol)).metadata
+    dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata))
   }
 
-  override protected def outputDataType: DataType = new VectorUDT()
+  override def transformSchema(schema: StructType): StructType = {
+    val inputType = schema($(inputCol)).dataType
+    require(inputType.isInstanceOf[ArrayType],
+      s"The input column must be ArrayType, but got $inputType.")
+    val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
+    SchemaUtils.appendColumn(schema, attrGroup.toStructField())
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
new file mode 100644
index 0000000000000..2e4beb0bfff63
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.util.Utils
+
+class HashingTFSuite extends FunSuite with MLlibTestSparkContext {
+
+  test("params") {
+    val hashingTF = new HashingTF
+    ParamsSuite.checkParams(hashingTF, 3)
+  }
+
+  test("hashingTF") {
+    val df = sqlContext.createDataFrame(Seq(
+      (0, "a a b b c d".split(" ").toSeq)
+    )).toDF("id", "words")
+    val n = 100
+    val hashingTF = new HashingTF()
+      .setInputCol("words")
+      .setOutputCol("features")
+      .setNumFeatures(n)
+    val output = hashingTF.transform(df)
+    val attrGroup = AttributeGroup.fromStructField(output.schema("features"))
+    require(attrGroup.numAttributes === Some(n))
+    val features = output.select("features").first().getAs[Vector](0)
+    // Assume perfect hash on "a", "b", "c", and "d".
+    def idx(any: Any): Int = Utils.nonNegativeMod(any.##, n)
+    val expected = Vectors.sparse(n,
+      Seq((idx("a"), 2.0), (idx("b"), 2.0), (idx("c"), 1.0), (idx("d"), 1.0)))
+    assert(features ~== expected absTol 1e-14)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
index b96874f3a8821..d270ad7613af1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
@@ -201,3 +201,23 @@ class ParamsSuite extends FunSuite {
     assert(inArray(1) && inArray(2) && !inArray(0))
   }
 }
+
+object ParamsSuite extends FunSuite {
+
+  /**
+   * Checks common requirements for [[Params.params]]: 1) number of params; 2) params are ordered
+   * by names; 3) param parent has the same UID as the object's UID; 4) param name is the same as
+   * the param method name.
+   */
+  def checkParams(obj: Params, expectedNumParams: Int): Unit = {
+    val params = obj.params
+    require(params.length === expectedNumParams,
+      s"Expect $expectedNumParams params but got ${params.length}: ${params.map(_.name).toSeq}.")
+    val paramNames = params.map(_.name)
+    require(paramNames === paramNames.sorted)
+    params.foreach { p =>
+      assert(p.parent === obj.uid)
+      assert(obj.getParam(p.name) === p)
+    }
+  }
+}

From 956c4c910cb536a02128349f2250d0a5f9924d0c Mon Sep 17 00:00:00 2001
From: Hari Shreedharan <hshreedharan@apache.org>
Date: Thu, 21 May 2015 20:24:28 -0500
Subject: [PATCH 319/320] [SPARK-7657] [YARN] Add driver logs links in
 application UI, in cluster mode.

This PR adds the URLs to the driver logs to `SparkListenerApplicationStarted` event, which is later used by the `ExecutorsListener` to populate the URLs to the driver logs in its own state. This info is then used when the UI is rendered to display links to the logs.

Author: Hari Shreedharan <hshreedharan@apache.org>

Closes #6166 from harishreedharan/am-log-link and squashes the following commits:

943fc4f [Hari Shreedharan] Merge remote-tracking branch 'asf/master' into am-log-link
9e5c04b [Hari Shreedharan] Merge remote-tracking branch 'asf/master' into am-log-link
b3f9b9d [Hari Shreedharan] Updated comment based on feedback.
0840a95 [Hari Shreedharan] Move the result and sc.stop back to original location, minor import changes.
537a2f7 [Hari Shreedharan] Add test to ensure the log urls are populated and valid.
4033725 [Hari Shreedharan] Adding comments explaining how node reports are used to get the log urls.
6c5c285 [Hari Shreedharan] Import order.
346f4ea [Hari Shreedharan] Review feedback fixes.
629c1dc [Hari Shreedharan] Cleanup.
99fb1a3 [Hari Shreedharan] Send the log urls in App start event, to ensure that other listeners are not affected.
c0de336 [Hari Shreedharan] Ensure new unit test cleans up after itself.
50cdae3 [Hari Shreedharan] Added unit test, made the approach generic.
402e8e4 [Hari Shreedharan] Use `NodeReport` to get the URL for the logs. Also, make the environment variables generic so other cluster managers can use them as well.
1cf338f [Hari Shreedharan] [SPARK-7657][YARN] Add driver link in application UI, in cluster mode.
---
 .../scala/org/apache/spark/SparkContext.scala |  2 +-
 .../spark/scheduler/SchedulerBackend.scala    |  7 ++
 .../spark/scheduler/SparkListener.scala       |  9 ++-
 .../apache/spark/ui/exec/ExecutorsTab.scala   | 12 ++-
 .../org/apache/spark/util/JsonProtocol.scala  |  6 +-
 .../spark/deploy/yarn/YarnRMClient.scala      |  4 +-
 .../deploy/yarn/YarnSparkHadoopUtil.scala     |  7 +-
 .../cluster/YarnClusterSchedulerBackend.scala | 77 ++++++++++++++++++-
 .../spark/deploy/yarn/YarnClusterSuite.scala  | 24 +++++-
 9 files changed, 136 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index cf3820fcb6a35..ad78bdfde2dfb 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1991,7 +1991,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
     // Note: this code assumes that the task scheduler has been initialized and has contacted
     // the cluster manager to get an application ID (in case the cluster manager provides one).
     listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
-      startTime, sparkUser, applicationAttemptId))
+      startTime, sparkUser, applicationAttemptId, schedulerBackend.getDriverLogUrls))
   }
 
   /** Post the application end event */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index 646820520ea1b..8801a761afae3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -49,4 +49,11 @@ private[spark] trait SchedulerBackend {
    */
   def applicationAttemptId(): Option[String] = None
 
+  /**
+   * Get the URLs for the driver logs. These URLs are used to display the links in the UI
+   * Executors tab for the driver.
+   * @return Map containing the log names and their respective URLs
+   */
+  def getDriverLogUrls: Option[Map[String, String]] = None
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 169d4fd3a94f0..863d0befbc19e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -110,8 +110,13 @@ case class SparkListenerExecutorMetricsUpdate(
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerApplicationStart(appName: String, appId: Option[String],
-   time: Long, sparkUser: String, appAttemptId: Option[String]) extends SparkListenerEvent
+case class SparkListenerApplicationStart(
+    appName: String,
+    appId: Option[String],
+    time: Long,
+    sparkUser: String,
+    appAttemptId: Option[String],
+    driverLogs: Option[Map[String, String]] = None) extends SparkListenerEvent
 
 @DeveloperApi
 case class SparkListenerApplicationEnd(time: Long) extends SparkListenerEvent
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index 0a08b000e2d03..39583af14390d 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ui.exec
 
 import scala.collection.mutable.HashMap
 
-import org.apache.spark.ExceptionFailure
+import org.apache.spark.{ExceptionFailure, SparkContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.{StorageStatus, StorageStatusListener}
@@ -73,6 +73,16 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
     uiData.finishReason = Some(executorRemoved.reason)
   }
 
+  override def onApplicationStart(applicationStart: SparkListenerApplicationStart): Unit = {
+    applicationStart.driverLogs.foreach { logs =>
+      val storageStatus = storageStatusList.find { s =>
+        s.blockManagerId.executorId == SparkContext.LEGACY_DRIVER_IDENTIFIER ||
+        s.blockManagerId.executorId == SparkContext.DRIVER_IDENTIFIER
+      }
+      storageStatus.foreach { s => executorToLogUrls(s.blockManagerId.executorId) = logs.toMap }
+    }
+  }
+
   override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized {
     val eid = taskStart.taskInfo.executorId
     executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 3f162d1f6c3eb..adf69a4e78e71 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -196,7 +196,8 @@ private[spark] object JsonProtocol {
     ("App ID" -> applicationStart.appId.map(JString(_)).getOrElse(JNothing)) ~
     ("Timestamp" -> applicationStart.time) ~
     ("User" -> applicationStart.sparkUser) ~
-    ("App Attempt ID" -> applicationStart.appAttemptId.map(JString(_)).getOrElse(JNothing))
+    ("App Attempt ID" -> applicationStart.appAttemptId.map(JString(_)).getOrElse(JNothing)) ~
+    ("Driver Logs" -> applicationStart.driverLogs.map(mapToJson).getOrElse(JNothing))
   }
 
   def applicationEndToJson(applicationEnd: SparkListenerApplicationEnd): JValue = {
@@ -570,7 +571,8 @@ private[spark] object JsonProtocol {
     val time = (json \ "Timestamp").extract[Long]
     val sparkUser = (json \ "User").extract[String]
     val appAttemptId = Utils.jsonOption(json \ "App Attempt ID").map(_.extract[String])
-    SparkListenerApplicationStart(appName, appId, time, sparkUser, appAttemptId)
+    val driverLogs = Utils.jsonOption(json \ "Driver Logs").map(mapFromJson)
+    SparkListenerApplicationStart(appName, appId, time, sparkUser, appAttemptId, driverLogs)
   }
 
   def applicationEndFromJson(json: JValue): SparkListenerApplicationEnd = {
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index b134751366522..ffe71dfd7d257 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -89,9 +89,7 @@ private[spark] class YarnRMClient(args: ApplicationMasterArguments) extends Logg
 
   /** Returns the attempt ID. */
   def getAttemptId(): ApplicationAttemptId = {
-    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
-    val containerId = ConverterUtils.toContainerId(containerIdString)
-    containerId.getApplicationAttemptId()
+    YarnSparkHadoopUtil.get.getContainerId.getApplicationAttemptId()
   }
 
   /** Returns the configuration for the AmIpFilter to add to the Spark UI. */
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index ba91872107d0c..5e6531895c7ba 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -33,7 +33,8 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.api.ApplicationConstants
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
-import org.apache.hadoop.yarn.api.records.{Priority, ApplicationAccessType}
+import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, Priority}
+import org.apache.hadoop.yarn.util.ConverterUtils
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -136,6 +137,10 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
     tokenRenewer.foreach(_.stop())
   }
 
+  private[spark] def getContainerId: ContainerId = {
+    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+    ConverterUtils.toContainerId(containerIdString)
+  }
 }
 
 object YarnSparkHadoopUtil {
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index aeb218a575455..1ace1a97d5156 100644
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -17,10 +17,19 @@
 
 package org.apache.spark.scheduler.cluster
 
+import java.net.NetworkInterface
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.yarn.api.records.NodeState
+import org.apache.hadoop.yarn.client.api.YarnClient
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+
 import org.apache.spark.SparkContext
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
 import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.scheduler.TaskSchedulerImpl
-import org.apache.spark.util.IntParam
+import org.apache.spark.util.{IntParam, Utils}
 
 private[spark] class YarnClusterSchedulerBackend(
     scheduler: TaskSchedulerImpl,
@@ -53,4 +62,70 @@ private[spark] class YarnClusterSchedulerBackend(
       logError("Application attempt ID is not set.")
       super.applicationAttemptId
     }
+
+  override def getDriverLogUrls: Option[Map[String, String]] = {
+    var yarnClientOpt: Option[YarnClient] = None
+    var driverLogs: Option[Map[String, String]] = None
+    try {
+      val yarnConf = new YarnConfiguration(sc.hadoopConfiguration)
+      val containerId = YarnSparkHadoopUtil.get.getContainerId
+      yarnClientOpt = Some(YarnClient.createYarnClient())
+      yarnClientOpt.foreach { yarnClient =>
+        yarnClient.init(yarnConf)
+        yarnClient.start()
+
+        // For newer versions of YARN, we can find the HTTP address for a given node by getting a
+        // container report for a given container. But container reports came only in Hadoop 2.4,
+        // so we basically have to get the node reports for all nodes and find the one which runs
+        // this container. For that we have to compare the node's host against the current host.
+        // Since the host can have multiple addresses, we need to compare against all of them to
+        // find out if one matches.
+
+        // Get all the addresses of this node.
+        val addresses =
+          NetworkInterface.getNetworkInterfaces.asScala
+            .flatMap(_.getInetAddresses.asScala)
+            .toSeq
+
+        // Find a node report that matches one of the addresses
+        val nodeReport =
+          yarnClient.getNodeReports(NodeState.RUNNING).asScala.find { x =>
+            val host = x.getNodeId.getHost
+            addresses.exists { address =>
+              address.getHostAddress == host ||
+                address.getHostName == host ||
+                address.getCanonicalHostName == host
+            }
+          }
+
+        // Now that we have found the report for the Node Manager that the AM is running on, we
+        // can get the base HTTP address for the Node manager from the report.
+        // The format used for the logs for each container is well-known and can be constructed
+        // using the NM's HTTP address and the container ID.
+        // The NM may be running several containers, but we can build the URL for the AM using
+        // the AM's container ID, which we already know.
+        nodeReport.foreach { report =>
+          val httpAddress = report.getHttpAddress
+          // lookup appropriate http scheme for container log urls
+          val yarnHttpPolicy = yarnConf.get(
+            YarnConfiguration.YARN_HTTP_POLICY_KEY,
+            YarnConfiguration.YARN_HTTP_POLICY_DEFAULT
+          )
+          val user = Utils.getCurrentUserName()
+          val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://"
+          val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user"
+          logDebug(s"Base URL for logs: $baseUrl")
+          driverLogs = Some(
+            Map("stderr" -> s"$baseUrl/stderr?start=0", "stdout" -> s"$baseUrl/stdout?start=0"))
+        }
+      }
+    } catch {
+      case e: Exception =>
+        logInfo("Node Report API is not available in the version of YARN being used, so AM" +
+          " logs link will not appear in application UI", e)
+    } finally {
+      yarnClientOpt.foreach(_.close())
+    }
+    driverLogs
+  }
 }
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index d3c606e0ed998..dcaeb2e43ff41 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable
+import scala.io.Source
 
 import com.google.common.base.Charsets.UTF_8
 import com.google.common.io.ByteStreams
@@ -33,7 +34,8 @@ import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}
 
 import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException, TestUtils}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
-import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListener, SparkListenerExecutorAdded}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationStart,
+  SparkListenerExecutorAdded}
 import org.apache.spark.util.Utils
 
 /**
@@ -290,10 +292,15 @@ class YarnClusterSuite extends FunSuite with BeforeAndAfterAll with Matchers wit
 
 private[spark] class SaveExecutorInfo extends SparkListener {
   val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()
+  var driverLogs: Option[collection.Map[String, String]] = None
 
   override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
     addedExecutorInfos(executor.executorId) = executor.executorInfo
   }
+
+  override def onApplicationStart(appStart: SparkListenerApplicationStart): Unit = {
+    driverLogs = appStart.driverLogs
+  }
 }
 
 private object YarnClusterDriver extends Logging with Matchers {
@@ -314,6 +321,7 @@ private object YarnClusterDriver extends Logging with Matchers {
     val sc = new SparkContext(new SparkConf()
       .set("spark.extraListeners", classOf[SaveExecutorInfo].getName)
       .setAppName("yarn \"test app\" 'with quotes' and \\back\\slashes and $dollarSigns"))
+    val conf = sc.getConf
     val status = new File(args(0))
     var result = "failure"
     try {
@@ -335,6 +343,20 @@ private object YarnClusterDriver extends Logging with Matchers {
     executorInfos.foreach { info =>
       assert(info.logUrlMap.nonEmpty)
     }
+
+    // If we are running in yarn-cluster mode, verify that driver logs are downloadable.
+    if (conf.get("spark.master") == "yarn-cluster") {
+      assert(listener.driverLogs.nonEmpty)
+      val driverLogs = listener.driverLogs.get
+      assert(driverLogs.size === 2)
+      assert(driverLogs.containsKey("stderr"))
+      assert(driverLogs.containsKey("stdout"))
+      val stderr = driverLogs("stderr") // YARN puts everything in stderr.
+      val lines = Source.fromURL(stderr).getLines()
+      // Look for a line that contains YarnClusterSchedulerBackend, since that is guaranteed in
+      // cluster mode.
+      assert(lines.exists(_.contains("YarnClusterSchedulerBackend")))
+    }
   }
 
 }

From e4136ea6c457bc74cee312aa14974498ab4633eb Mon Sep 17 00:00:00 2001
From: Mike Dusenberry <dusenberrymw@gmail.com>
Date: Thu, 21 May 2015 19:05:04 -0700
Subject: [PATCH 320/320] [DOCS] [MLLIB] Fixing broken link in MLlib Linear
 Methods documentation.

Just a small change: fixed a broken link in the MLlib Linear Methods documentation by removing a newline character between the link title and link address.

Author: Mike Dusenberry <dusenberrymw@gmail.com>

Closes #6340 from dusenberrymw/Fix_MLlib_Linear_Methods_link and squashes the following commits:

0a57818 [Mike Dusenberry] Fixing broken link in MLlib Linear Methods documentation.
---
 docs/mllib-linear-methods.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 2b2be4d9d0273..8029edca16002 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -785,8 +785,7 @@ gradient descent (`stepSize`, `numIterations`, `miniBatchFraction`).  For each o
 all three possible regularizations (none, L1 or L2).
 
 For Logistic Regression, [L-BFGS](api/scala/index.html#org.apache.spark.mllib.optimization.LBFGS)
-version is implemented under [LogisticRegressionWithLBFGS]
-(api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS), and this
+version is implemented under [LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS), and this
 version supports both binary and multinomial Logistic Regression while SGD version only supports
 binary Logistic Regression. However, L-BFGS version doesn't support L1 regularization but SGD one
 supports L1 regularization. When L1 regularization is not required, L-BFGS version is strongly